diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..7a2ab155 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,7 @@ +# top-most EditorConfig file +root = true + +[*.{cpp,h,hpp,c,amlg}] +indent_style = tab +indent_size = 4 +trim_trailing_whitespace = true diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..2cb48cb8 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,6 @@ +########################################## +# code ownership +########################################## + +# default ownership: default owners for everything in the repo (Unless a later match takes precedence) +* @howsoai/devs diff --git a/.github/workflows/build-test-package.yml b/.github/workflows/build-test-package.yml new file mode 100644 index 00000000..537d28c9 --- /dev/null +++ b/.github/workflows/build-test-package.yml @@ -0,0 +1,300 @@ +name: Reusable WF - Build + +on: + workflow_call: + inputs: + version: + required: true + type: string + +defaults: + run: + shell: bash + +jobs: + + build-linux: + runs-on: ubuntu-20.04 + container: + image: ghcr.io/howsoai/amalgam-build-container-linux:0.13.0 + credentials: + username: ${{ github.actor }} + password: ${{ github.token }} + strategy: + matrix: + preset: + - arch: amd64 + platform: linux + - arch: arm64 + platform: linux + - arch: arm64_8a + platform: linux + - arch: wasm64 + platform: linux + permissions: + contents: write + + steps: + - uses: actions/checkout@v3 + + - name: Set build preset + run: | + PRESET=$(echo ${{ matrix.preset.arch }}-release-${{ matrix.preset.platform }}) + echo "PRESET=$(echo $PRESET)" >> $GITHUB_ENV + echo "Build preset: $PRESET" + + - name: CMake Configure + run: AMALGAM_BUILD_VERSION=${{ inputs.version }} cmake --preset $PRESET + + - name: CMake Build + run: cmake --build --preset $PRESET -- --quiet + + - name: CMake Test + run: cmake --build --preset $PRESET --target test + + - name: CMake Install + run: cmake --build --preset $PRESET --target install + + - name: CMake Package + run: cmake --build --preset $PRESET --target package + + - name: Upload Artifact + uses: actions/upload-artifact@v3 + with: + name: amalgam-${{ inputs.version }}-${{ matrix.preset.platform }}-${{ matrix.preset.arch }} + path: ./out/package/amalgam-*.tar.gz + if-no-files-found: error + + build-macos: + runs-on: macos-11 + strategy: + matrix: + preset: + - arch: amd64 + build: release + platform: macos + - arch: arm64 + build: release + platform: macos + permissions: + contents: write + + steps: + - uses: actions/checkout@v3 + + - name: Set build preset + run: | + PRESET=$(echo ${{ matrix.preset.arch }}-release-${{ matrix.preset.platform }}) + echo "PRESET=$(echo $PRESET)" >> $GITHUB_ENV + echo "Build preset: $PRESET" + + - name: Install build dependencies + run: | + if [ "${{ matrix.preset.arch }}" = "arm64" ]; then + brew uninstall --ignore-dependencies libomp + brew cleanup -s + rm -rf `brew --cache` + brew fetch --force --bottle-tag=arm64_big_sur libomp + brew install `brew --cache --bottle-tag=arm64_big_sur libomp` + else + brew install libomp + fi + brew list --versions libomp + brew install ninja + + - name: CMake Configure + run: AMALGAM_BUILD_VERSION=${{ inputs.version }} cmake --preset $PRESET + + - name: CMake Build + run: cmake --build --preset $PRESET -- --quiet + + - name: CMake Test + run: cmake --build --preset $PRESET --target test + + - name: CMake Install + run: cmake --build --preset $PRESET --target install + + - name: CMake Package + run: cmake --build --preset $PRESET --target package + + - name: Upload Artifact + uses: actions/upload-artifact@v3 + with: + name: amalgam-${{ inputs.version }}-darwin-${{ matrix.preset.arch }} + path: ./out/package/amalgam-*.tar.gz + if-no-files-found: error + + build-windows: + runs-on: windows-2022 + strategy: + matrix: + preset: + - arch: amd64 + build: release + platform: windows + permissions: + contents: write + + steps: + - uses: actions/checkout@v3 + + - name: Set build preset + run: | + PRESET=$(echo ${{ matrix.preset.arch }}-release-${{ matrix.preset.platform }}) + echo "PRESET=$(echo $PRESET)" >> $GITHUB_ENV + echo "Build preset: $PRESET" + + - name: Enable developer commandline tools + uses: ilammy/msvc-dev-cmd@v1 + + - name: Download tz data + shell: pwsh + run: ./build/powershell/Download-Tzdata.ps1 + + - name: Install build dependencies + run: choco upgrade ninja + + - name: CMake Configure + run: AMALGAM_BUILD_VERSION=${{ inputs.version }} cmake --preset $PRESET + + - name: CMake Build + run: cmake --build --preset $PRESET -- --quiet + + - name: CMake Test + run: cmake --build --preset $PRESET --target test + + - name: CMake Install + run: cmake --build --preset $PRESET --target install + + - name: CMake Package + run: cmake --build --preset $PRESET --target package + + - name: Upload Artifact + uses: actions/upload-artifact@v3 + with: + name: amalgam-${{ inputs.version }}-${{ matrix.preset.platform }}-${{ matrix.preset.arch }} + path: ./out/package/amalgam-*.tar.gz + if-no-files-found: error + + smoke-test-linux-amd64: + needs: ['build-linux'] + runs-on: ubuntu-latest + steps: + - name: Download Artifact + uses: actions/download-artifact@v3 + with: + name: amalgam-${{ inputs.version }}-linux-amd64 + + - name: Extract Amalgam + run: | + mkdir ./amalgam + tar -xvf ./amalgam-${{ inputs.version }}-linux-amd64.tar.gz -C ./amalgam + + - name: Smoke test + run: | + set -e + BIN=./amalgam/bin + echo -n "amalgam: " && $BIN/amalgam --version + echo -n "amalgam-mt: " && $BIN/amalgam-mt --version + echo -n "amalgam-mt-noavx: " && $BIN/amalgam-mt-noavx --version + echo -n "amalgam-st: " && $BIN/amalgam-st --version + echo -n "amalgam-omp: " && $BIN/amalgam-omp --version + + smoke-test-linux-arm64: + needs: ['build-linux'] + runs-on: ubuntu-latest + steps: + - name: Download Artifact + uses: actions/download-artifact@v3 + with: + name: amalgam-${{ inputs.version }}-linux-arm64 + + - name: Extract Amalgam + run: | + mkdir ./amalgam + tar -xvf ./amalgam-${{ inputs.version }}-linux-arm64.tar.gz -C ./amalgam + + - name: Smoke test + uses: pguyot/arm-runner-action@v2 + with: + base_image: raspios_lite_arm64:latest + cpu: cortex-a8 + commands: | + set -e + PATH=$PATH:/usr/aarch64-linux-gnu + BIN=./amalgam/bin + echo -n "amalgam: " && $BIN/amalgam --version + echo -n "amalgam-mt: " && $BIN/amalgam-mt --version + echo -n "amalgam-st: " && $BIN/amalgam-st --version + echo -n "amalgam-omp: " && $BIN/amalgam-omp --version + + smoke-test-linux-arm64_8a: + needs: ['build-linux'] + runs-on: ubuntu-latest + steps: + - name: Download Artifact + uses: actions/download-artifact@v3 + with: + name: amalgam-${{ inputs.version }}-linux-arm64_8a + + - name: Extract Amalgam + run: | + mkdir ./amalgam + tar -xvf ./amalgam-${{ inputs.version }}-linux-arm64_8a.tar.gz -C ./amalgam + + - name: Smoke test + uses: pguyot/arm-runner-action@v2 + with: + base_image: raspios_lite_arm64:latest + cpu: cortex-a7 + commands: | + set -e + PATH=$PATH:/usr/aarch64-linux-gnu + BIN=./amalgam/bin + echo -n "amalgam: " && $BIN/amalgam --version + echo -n "amalgam-st: " && $BIN/amalgam-st --version + + smoke-test-macos-amd64: + needs: ['build-macos'] + runs-on: macos-latest + steps: + - name: Download Artifact + uses: actions/download-artifact@v3 + with: + name: amalgam-${{ inputs.version }}-darwin-amd64 + + - name: Extract Amalgam + run: | + mkdir ./amalgam + tar -xvf ./amalgam-${{ inputs.version }}-darwin-amd64.tar.gz -C ./amalgam + + # GitHub macos runner does not support AVX + - name: Smoke test + run: | + set -e + BIN=./amalgam/bin + echo -n "amalgam-mt-noavx: " && $BIN/amalgam-mt-noavx --version + + smoke-test-windows-amd64: + needs: ['build-windows'] + runs-on: windows-latest + steps: + - name: Download Artifact + uses: actions/download-artifact@v3 + with: + name: amalgam-${{ inputs.version }}-windows-amd64 + + - name: Extract Amalgam + run: | + mkdir ./amalgam + tar -xvf ./amalgam-${{ inputs.version }}-windows-amd64.tar.gz -C ./amalgam + + - name: Smoke test + run: | + set -e + BIN=./amalgam/bin + echo -n "amalgam: " && $BIN/amalgam --version + echo -n "amalgam-mt: " && $BIN/amalgam-mt --version + echo -n "amalgam-mt-noavx: " && $BIN/amalgam-mt-noavx --version + echo -n "amalgam-st: " && $BIN/amalgam-st --version + echo -n "amalgam-omp: " && $BIN/amalgam-omp --version diff --git a/.github/workflows/create-branch-build.yml b/.github/workflows/create-branch-build.yml new file mode 100644 index 00000000..63633ec0 --- /dev/null +++ b/.github/workflows/create-branch-build.yml @@ -0,0 +1,59 @@ +name: Create Branch Build +run-name: "Branch Build (${{ github.run_attempt }}.${{ github.run_number }}) - ${{ github.ref_name }}" + +on: + workflow_dispatch: + +defaults: + run: + shell: bash + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + set-branch-version: + runs-on: ubuntu-latest + outputs: + version: ${{ steps.set-branch-version.outputs.version }} + steps: + + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Get previous git tag + id: previous-tag + uses: WyriHaximus/github-action-get-previous-tag@v1 + with: + fallback: 0.0.0 + + - name: Get next semver from previous tag + id: next-semvers + uses: WyriHaximus/github-action-next-semvers@v1 + with: + version: ${{ steps.previous-tag.outputs.tag }} + + - name: Set Branch version + id: set-branch-version + run: | + BRANCH_ITERATION=${{ github.run_attempt }}.${{ github.run_number }} + echo "version=$(echo ${{ steps.next-semvers.outputs.patch }}-alpha+BR.${{ github.ref_name }}.${BRANCH_ITERATION})" >> $GITHUB_OUTPUT + + build-test-package: + needs: ['set-branch-version'] + uses: "./.github/workflows/build-test-package.yml" + secrets: inherit + with: + version: ${{ needs.set-branch-version.outputs.version }} + + # This job is here to have only one final step to add for "Status Checks" + # in GitHub, instead of adding every leaf test from 'build-test-package' + final-check: + needs: ['build-test-package'] + if: always() && (contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) + runs-on: ubuntu-latest + steps: + - run: exit 1 diff --git a/.github/workflows/create-pr-build.yml b/.github/workflows/create-pr-build.yml new file mode 100644 index 00000000..b84cebe5 --- /dev/null +++ b/.github/workflows/create-pr-build.yml @@ -0,0 +1,62 @@ +name: Create PR Build +run-name: "PR Build: #${{ github.event.pull_request.number }} (${{ github.run_attempt }}.${{ github.run_number }}) - ${{ github.event.pull_request.title }}" + +on: + pull_request: + branches: + - 'main' + +defaults: + run: + shell: bash + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + set-pr-version: + runs-on: ubuntu-latest + outputs: + version: ${{ steps.set-pr-version.outputs.version }} + steps: + + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Get previous git tag + id: previous-tag + uses: WyriHaximus/github-action-get-previous-tag@v1 + with: + fallback: 0.0.0 + + - name: Get next semver from previous tag + id: next-semvers + uses: WyriHaximus/github-action-next-semvers@v1 + with: + version: ${{ steps.previous-tag.outputs.tag }} + + - name: Set PR version + id: set-pr-version + run: | + PR_NUMBER=${{ github.event.pull_request.number }} + PR_ITERATION=${{ github.run_attempt }}.${{ github.run_number }} + echo "version=$(echo ${{ steps.next-semvers.outputs.patch }}-alpha+PR.${PR_NUMBER}.${PR_ITERATION})" >> $GITHUB_OUTPUT + + build-test-package: + needs: ['set-pr-version'] + uses: "./.github/workflows/build-test-package.yml" + secrets: inherit + with: + version: ${{ needs.set-pr-version.outputs.version }} + + # This job is here to have only one final step to add for "Status Checks" + # in GitHub, instead of adding every leaf test from 'build-test-package' + final-check: + needs: ['build-test-package'] + if: always() && (contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) + runs-on: ubuntu-latest + steps: + - run: exit 1 diff --git a/.github/workflows/create-release-build.yml b/.github/workflows/create-release-build.yml new file mode 100644 index 00000000..f96afff0 --- /dev/null +++ b/.github/workflows/create-release-build.yml @@ -0,0 +1,116 @@ +name: Create Release Build +run-name: "Release Build" + +on: + workflow_dispatch: + inputs: + optional-release-tag: + description: "Optional release tag (if empty, will search previous commit titles for MAJOR/MINOR and autoincrement latest tag accordingly)" + required: false + +defaults: + run: + shell: bash + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + + construct-release-tag: + runs-on: ubuntu-latest + outputs: + release-tag: ${{ steps.construct-release-tag.outputs.release-tag }} + steps: + + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Get previous git tag + id: previous-tag + uses: WyriHaximus/github-action-get-previous-tag@v1 + with: + fallback: 0.0.0 + + - name: Get next semver from previous tag + id: next-semvers + uses: WyriHaximus/github-action-next-semvers@v1 + with: + version: ${{ steps.previous-tag.outputs.tag }} + + - name: Construct release tag + id: construct-release-tag + run: | + RELEASE_TAG="" + + # Check for null tag: + if [[ "${{ inputs.optional-release-tag }}" = "0.0.0" || (-z "${{ inputs.optional-release-tag }}" && "${{ steps.previous-tag.outputs.tag }}" = "0.0.0") ]]; then + echo "❌ - Null tag (0.0.0) is invalid for a release (hint: repo could contain no tags)" + exit 1 + fi + + # If tag not given by user, deduce from commit titles since last tag: + if test -z "${{ inputs.optional-release-tag }}"; then + echo "Autoincrementing version based on previous commit titles since last tag" + + # Search previous commits for MAJOR/MINOR text tokens: + IS_MAJOR_BUMP=false + IS_MINOR_BUMP=false + echo "Checking commit titles since last tag: '${{ steps.previous-tag.outputs.tag }}'..." + COMMITS=$(git log --pretty=format:%s ${{ steps.previous-tag.outputs.tag }}..@) + while read commit + do + if [[ $commit == *"MAJOR"* ]]; then + echo -e "\tMajor: title='$commit'" + IS_MAJOR_BUMP=true + elif [[ $commit == *"MINOR"* ]]; then + echo -e "\tMinor: title='$commit'" + IS_MINOR_BUMP=true + else + echo -e "\tPatch: title='$commit'" + fi + done <<< "$COMMITS" + + # Set version according to what was found in commit titles: + if [ "$IS_MAJOR_BUMP" = true ]; then + echo "Bumping major version" + RELEASE_TAG=${{ steps.next-semvers.outputs.major }} + elif [ "$IS_MINOR_BUMP" = true ]; then + echo "Bumping minor version" + RELEASE_TAG=${{ steps.next-semvers.outputs.minor }} + else + # If no major/minor found, treat as patch: + echo "Bumping patch version" + RELEASE_TAG=${{ steps.next-semvers.outputs.patch }} + fi + else + echo "Using user provided release tag" + RELEASE_TAG=${{ inputs.optional-release-tag }} + fi + + # Check if valid semver: + regex='^([0-9]+\.){2}(\*|[0-9]+)(-.*)?$' + if [[ ! "$RELEASE_TAG" =~ $regex ]]; then + echo "❌ - Release tag is not a valid semver: $RELEASE_TAG" + exit 1 + fi + echo "✔ - Release tag is a valid semver" + + # Check if tag already exists: + if git rev-parse "$RELEASE_TAG" >/dev/null 2>&1; then + echo "❌ - Release tag already exists: $RELEASE_TAG" + exit 1 + fi + echo "✔ - Release tag does not exist" + + echo "Release tag: $RELEASE_TAG" + echo "release-tag=$(echo $RELEASE_TAG)" >> $GITHUB_OUTPUT + + release: + needs: ['construct-release-tag'] + uses: "./.github/workflows/release.yml" + secrets: inherit + with: + version: ${{ needs.construct-release-tag.outputs.release-tag }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..de19c789 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,40 @@ +name: Reusable WF - Release + +on: + workflow_call: + inputs: + version: + required: true + type: string + +defaults: + run: + shell: bash + +jobs: + + build-test-package: + uses: "./.github/workflows/build-test-package.yml" + secrets: inherit + with: + version: ${{ inputs.version }} + + create-release: + needs: ['build-test-package'] + runs-on: ubuntu-latest + steps: + + - name: Download Artifacts + uses: actions/download-artifact@v3 + + - name: Create Release + uses: ncipollo/release-action@v1 + with: + tag: ${{ inputs.version }} + commit: ${{ github.sha }} + name: "Amalgam ${{ inputs.version }}" + artifactErrorsFailBuild: true + generateReleaseNotes: true + makeLatest: legacy + artifacts: amalgam-*/amalgam-*.tar.gz + artifactContentType: application/gzip diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..a9d6a723 --- /dev/null +++ b/.gitignore @@ -0,0 +1,46 @@ +## Use glob syntax +syntax: glob + +## Files created by build +src/Amalgam/AmalgamVersion.h + +## User-specific files +*.suo +*.user +*.sln.docstates +.vs/ +.vscode/ +.direnv +.envrc +.env + +## Amalgam metadata files +*.mdam + +## Build results +out/ +x64/ + +## Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opensdf +*.sdf +*.cachefile +*.opendb +*.VC.db + +## Visual Studio profiler +*.psess +*.vsp +*.vspx + +## Windows Ignores +$RECYCLE.BIN/ +Thumbs.db +ehthumbs.db +Desktop.ini + +## Mac Ignores +.DS_Store \ No newline at end of file diff --git a/AMALGAM-BEGINNER-GUIDE.md b/AMALGAM-BEGINNER-GUIDE.md new file mode 100644 index 00000000..e4de92ca --- /dev/null +++ b/AMALGAM-BEGINNER-GUIDE.md @@ -0,0 +1,1094 @@ +# Amalgam Beginner Guide + +This is a colloquial guide for beginners to get started programming with Amalgam. It assumes some familiarity with programming. For detailed documentation on the language, see the [Amalgam Language Reference](https://howsoai.github.io/amalgam). + +## Amalgam : code-is-data-is-code + +Amalgam uses [S-expressions](https://en.wikipedia.org/wiki/S-expression) as its operators, which are a pair of parenthesis surrounding an opcode and its parameters. +One way to think about this is: every operator *is* a function. So when you see `(+ 2 1)`, you can read that +as "add two and one", or "call the function named '+' with +parameters of 2 and 1". +Since the scope of all operators and operations is explicit, there is no ambiguity with regard to order of operations. + +Examples: + +`(+ 2 3)` +> 5 +`(- 7 1)` +> 6 +`(- 1 7)` +> -6 +`(* 4 3)` +> 12 +`(/ 16 2)` +> 8 +`(+ 1 2 3 4 5)` +> 15 +`(* 2 5 (+ 3 4))` +> 70 + +> #Python +> print("hello world") +> +> ;Amalgam +> (print "hello world") + +For a list of all operations, see the [Amalgam Language Reference](https://howsoai.github.io/amalgam). + + +# Scripting +Amalgam is an interpreted (scripting) language. To run Amalgam files, typically with a *.amlg* extension, +you can simply use the Amalgam binary to run a script, for example if your script is named *my_script.amlg*: + +`/path/to/bin/amalgam my_script.amlg` + +Due to the syntax of `#!` being a private labeled variable, the traditional unix [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)) with the Amalgam interpreter works as expected. + +The first outermost operation will be executed in the script, thus most standalone script code should be in +a `(seq` block, since that is a single function that executes everything in it sequentially. +Example contexts of a script file: + +``` +(seq + (print "hello ") + (* 2 2) + (print "there " ) +) + +that code above prints "hello there " but anything else can go here since it is ignored and +only that first function is executed. +The 2*2 is evaluated but is not displayed because it is not inside a (print) statement +(print "general Kenobi\n") ; is ignored + +``` + +Thus for the purposes of this user guide, assume that all below examples are being executed inside a `(seq` block. +Since all code is a list whose contents are being evaluated, the code +could just as easily put all your code inside a **(list** block as +well. **(list** and **(seq** are identical, except that +**(seq** will return whatever the last item inside it evaluates to, +whereas **(list** does not, making **seq** more efficient for the purposes of evaluating code sequentially. + + (print (list 1 2 3)) ; prints the list itself, i.e., (list 1 2 3) + + (print (seq 1 2 3)) ; prints 3, since that's the last item in the list to be evaluated + +Since both are lists, if you were to **(get** an item from a +specific index from either of them, you'd get the same result: + + (print + (get (list 1 2 3) 1) ; get the item at index 1, which is a 2 + + (get (seq 1 2 3) 1) ;same + ) + + +Data Structures: +--------------- + +The data types of amalgam consist of immediate values, which are string and number, lists, which are ordered sets of elements and have an opcode associated with it (which may be list), and assoc, which is an associative array of key-value pairs. Code is just a list with a different opcode. + +Indices of lists are 0-based, and keys of an assoc are referred to as +the indices of an assoc. This concept unifies assocs and lists so that +you may think of lists as assocs where the index is the 'key' for each +value in a list. The order of items in an assoc is never guaranteed. + + (declare (assoc + + indices_of_my_list (indices (list 10 20 30 40)) ;returns (list 0 1 2 3) + indices_of_my_assoc (indices (assoc "x" 2 "y" 3 "z" 4)) ;returns just the 'keys', (list "x" "z" "y"), NOTE: order of indices in an assoc is not guaranteed + values_of_my_list (values (list 10 20 30 40)) ;return the exact same list (list 10 20 30 40) since the values of a list are the list itself + values_of_my_assoc (values (assoc "x" 2 "y" 3 "z" 4)) ;returns just the values (list 4 3 2) NOTE: order of values in an assoc is not guaranteed + + )) + +Also note that `(assoc foo 3 bar 5)` is same as `(assoc "foo" 3 "bar" 5)`, the quotes around +the indices (keys) are *optional*. This means that **(assoc** uses the literal string value of the +keys as provided to it. Thus if you have a variable named 'foo' and you intended the key to be the value +of that variable instead of it being "foo", you need to use the opcode **(associate** instead. +**(associate** evaluates the *keys* of an assoc and should be used anytime the *keys* are +either variables or output of some code. The values of an assoc are always evaluated. + + (declare (assoc foo "my_key" bar 5)) ; create a 'variable' named "foo" that has the value of "my_key" + (print (assoc foo bar)) ;prints (assoc foo 5) - the key was not evaluated + (print (associate foo bar) ;prints (assoc my_key 5) - the key was evaluated + + ;use (associate if the key is the result of output of some method: + (print (associate (call GenerateUUID) "hello")) ;prints (assoc "UUID_KEY_GOES_HERE" "hello") + + +Also note that once you declare a variable and it exists in the context +you cannot use **(declare** to overwrite it. Since variables are actually +just keys of an assoc that are on the stack, and those key already exist, +if you want to change their values, you need to use the **(assign** opcode: + + (assign (assoc x 4 y 8)) ; overwrites the values of x and y accordingly + + +For example, after you create a variable called **my\_list** that has a +list of letters: + + (declare (assoc my_list (list "a" "b" "c"))) + +if you want to now edit **my\_list** and append the letter "d" to it, +if you simply do this: + + (append my_list "d") + +nothing will happen because even though the code is evaluated, we didn't +'do' anything to the evaluated outcome, we didn't store it into anything! To update what value **my\_list** stores we have to do this: + + (assign (assoc my_list (append my_list "d"))) + + +To make this easier, Amalgam has the **(accum** opcode that can be used as such: + + (accum (assoc my_list d")) + +More examples of basic list operations: + + (seq + + ;declare a couple of lists of letters + (declare (assoc + kitty (list "A" "B" "C" "D" "E") + bunny (list "x" "y" "z") + )) + + ;different types of list operations + (declare (assoc + first_in_kitty (first kitty) ; result is "A" + + last_in_kitty (last kitty) ; result is "E" + + ;(trunc removes items from the end of a list + truncate_1_item_in_kitty (trunc kitty) ; result is (list "A" "B" "C" "D") + + truncate_all_items_in_kitty_leaving_2 (trunc kitty 2) ; result (list "A" "B" ) + + truncate_2_items_in_kitty (trunc kitty -2) ; result (list "A" "B" "C" ) + + + ;(tail removes items from the front of a list + remove_1_item_from_front_of_kitty (tail kitty) ; result is (list "B" "C" "D" "E") + + remove_all_from_front_of_kitty_leaving_2 (tail kitty 2) ; result is (list "D" "E") + + remove_2_items_from_front_of_kitty (tail kitty -2) ; result is (list "C" "D" "E") + + ;(append is straight forward + kitty_and_bunny (append kitty bunny) ;result is (list "A" "B" "C" "D" "E" "x" "y" "z") + + size_of_kitty (size kitty) ;result is 5 + + reverse_of_kitty (reverse kitty) ;result is (list "E" "D" "C" "B" "A") + )) + + ) + +# Variables and Scope + +There are several of ways to declare variables in Amalgam. One way is to use `(declare (assoc`. + +`(declare` = creates the variable in the current context (current scope), but only if it does not already exist. + +> ;Amalgam +> (declare (assoc h "hamster")) +> (print h) +> ;;outputs: hamster +> +> # Python equivalent: +> h = "hamster" +> print(h) + +Another way is `(let`, which makes the variable available *inside* the operator (local scope). + +> ;Amalgam +> (let (assoc y "yamster")) +> (print y) +> ;;outputs: null +> +> //Java equivalent: +> if(true) { +> String y = "yamster" +> } +> System.out.print(y); +> +> +> ;Amalgam +> (let +> (assoc y "yamster") +> (print y) +> ) +> ;;outputs: yamster +> +> //Java equivalent: +> if(true) { +> String y = "yamster" +> System.out.print(y); +> } + +**Important distinction** +`(let (assoc` and `(declare (assoc` create and initialize the variables, +they do **not** overwrite an already existing variables. +Use `(assign` to set previously declared variables. + +> //javascript +> var x = 5; //declare and set variable x to 5 +> x = ["a","b","c"]; //sets variable x to a list of letters instead +> +> +> ;Amalgam +> (declare (assoc x 5)) ;declare and set variable x to 5 +> (declare (assoc x (list "a" "b" "c"))) ;does nothing because x has already been declared +> (assign (assoc x (list "a" "b" "c"))) ;sets variable x to a list of letters instead + +More examples with descriptions: + + + (seq + ;a (declare (assoc will create an assoc of key -> value pairs where the values can be code itself. + ;note: the declaration can be treated as though it's done in parallel, so you CANNOT use values in the same declare to + ; calculate subsequent values like so: + (declare (assoc x 3 y 2 foo (* x y))) + (print foo "\n") ;outputs 0 because foo has already been evaluated, and when it was, x and y were nulls + ) + + (seq + ;if you want to use declared values to make new values, you have to chain the declare statements like so: + (declare (assoc x 3 y 2)) + (declare (assoc foo (* x y))) ;the multiplication is evaluated right here so the result is stored in foo + (print foo) ;thus we get the expected result of 6 here + ) + + + ;if we want foo to be a function, we need to make sure the code isn't evaluated right away, to do that we wrap it in a 'lambda' + (seq + (declare (assoc x 3 y 2)) + (declare (assoc foo (lambda (* x y)))) ;the multiplication is stored as the code itself, WITHOUT being evaluated + + (print "foo: " foo "\n") ;thus this returns the unevaluated code for the multiplication that's stored into foo + + ;now that the code in foo is not evaluated, to evaluate it we can 'call' it: + (print "calling foo: " + (call foo) ; this calls (evaluates) foo, which uses the values of x and y in the scope and thus returns a 6 + "\n" + ) + + ;since the code in foo is not evaluated until we actually call it, we can pass in parameters for x and y and evaluate it with + ;those parameters + (print "calling foo w/ params: " + (call foo (assoc x 4 y 8)) ;and now we have what most developers would consider a 'function' or 'method' + "\n" + ) + ) + +# Conditionals + +Amalgam's `(if` operator takes a series of condition / action pairs, +followed by an *optional* default action. + +> //Groovy +> x = 4 +> if(x < 3){ +> print "x is tiny" +> } else if(x == 3){ +> print "x is exactly 3" +> } else { +> print "x is huuge" +> } +> +> ;Amalgam +> (let +> (assoc x 4) +> (if (< x 3) +> (print "x is tiny") +> (= x 3) ;else if +> (print "x is exactly 3") +> ;else +> (print "x is huuge") +> ) +> ) + +# Loops and Maps + +The amalgam syntax is a little different than some other languages, +but the overall idea is the same. However, functional programming is strongly recommended, +as while loops containing an accum can be considerably slower and consume notably more memory. + +> //Java +> // print values 1-10 +> for(int i = 0; i < 10; i++){ +> System.out.print(i); +> } +> +> ;Amalgam +> ;This is a classic while loop, but not recommended as it has an accum within it +> ; Loops: +> (let +> (assoc i 0) +> (while (< i 10) +> (print i) ; do stuff here +> (accum (assoc i 1)) ;increment i by 1 +> ) +> ) +> +> +> ;Maps: +> ;Maps serve the same purpose as loops, but in a more functional way. Maps are also an easy and efficient way to iterate over a series of values that may not be a sequence +> ;and are not guaranteed to execute in order (i.e., map may utilize parallel processing, especially if preceeded by ||) +> ; Print values from 1 to 10 +> (map +> (lambda +> (print (target_value)) ;run this on each item from the list +> ) +> (range 0 9) ;generate a list from 0 through 9 +> ) + + +# Functions + +Functions in Amalgam *should* be implemented using labels to follow +coding standards, but can be unlabeled variables as well. + +> //Javascript +> function mul(x, y){ +> return x * y; +> } +> console.log( mul(4,2) ); +> //outputs: 8 +> +> ;Amalgam +> #mul (* x y) +> (print (call mul (assoc x 4 y 2))) +> ;outputs: 8 +> +> ;unlabeled function definition: +> (declare (assoc mul (lambda (* x y)))) + +Notes: + +`(lambda` means we're going to define a function (or any code) but we +are not going to evaluate it. In this example, it stores the +function/code itself into the variable. +We pass in parameters to a function as an `(assoc` with the variables as +the keys of that assoc. +`(call` evaluates/executes/runs the code inside the lambda. + +Functions with default parameters: + +> //Javascript +> //function to multiple all values in the list by each other and then to multiply them result by the specified factor +> //default my_list to be a list of 1 if it's not specified +> function multiplyValuesInList(my_list = [1], factor = 2) { +> return my_list.reduce((a, b) => a * b * factor); +> } +> +> console.log(multiplyValuesInList()); +> //outputs: 2 +> console.log(multiplyValuesInList([3,2],3)); +> //outputs: 18 +> +> ;Amalgam +> #multiplyValuesInList +> (declare +> (assoc +> my_list (list 1) ;specify the parameters and what the default values are +> factor 2 +> ) +> (* (apply "*" my_list) factor) +> ) +> +> (call multiplyValuesInList) +> ;;outputs: 2 +> (call multiplyValuesInList (assoc my_list (list 3 2) factor 3)) +> ;;outputs: 18 + +# Entities (Objects) + +Objects in Amalgam are called **entities**. +The base script being executed is an entity itself. + +To create 'child' contained entities, use `(create_entities`, to load +existing code as a contained entity use `(load_entity` instead. +Once there are contained (aka "child") entities, you can call their +functions via `(call_entity` or retrieve their data +via `(retrieve_from_entity`. + +The 'parent' container entity has full access to its full hierarchy of +contained entities (all its children entities and grand children, etc.) +Child entities, however only have access to their parent container +entity's labels that are marked with a **\#^**. + +> //Java +> public class Car { +> private String color = "white"; +> public void setColor(String c) { this.color = c; } +> public String getColor() { return this.color; } +> public void drive(int speed) { +> if(speed < 35) { +> System.out.println("slow " + color); +> } +> else { +> System.out.println("vroom " + color); +> } +> } +> } +> +> Car myCar = new Car(); //instantiate a car +> myCar.setColor("blue"); //set color +> myCar.drive(67); //drive fast +> +> +> ;;Amalgam +> ;creating a named entity will "instantiate" so that you can refer to it by name, in this example we name it "car" +> (create_entities "car" +> (lambda (null +> #color "white" +> +> #drive +> (declare +> (assoc speed 0) ;parameter, default to 0 +> (if (< speed 35) +> (print "slow " color "\n") +> ;else +> (print "vroom " color "\n") +> ) +> ) +> )) +> ) +> (assign_to_entities "car" (assoc color "blue")) ;set color +> (call_entity "car" "drive" (assoc speed 67)) ;drive fast +> +> +> ;...alternatively you could create an entity and assign it to a variable and then refer to it using the variable: +> (declare (assoc +> myCar +> (create_entities +> (null +> ;todo: copy-pasta code from the above (null +> ) +> ) +> )) +> (assign_to_entities myCar (assoc color "blue")) ;set color +> (call_entity myCar "drive" (assoc speed 67)) ;drive fast + +# Labels + +Entity attributes are denoted by "labels". To label an operator, just place a +label in front or above the referenced opcode. Labels are petty much +annotations and references to code and data; in object oriented programming, +they are for denoting creating methods and attributes, though an operator can have multiple labels. + + #foo + #bar + 5 + +This will attach the labels **foo** and **bar** to the immediate number 5 as +instantiated in the current location of the current entity, +allowing you to use either of them in code as variables, checked after all other lexical scopes: + + (print foo " toes and " bar " fingers\n") + +or you can label more complex code: + + (seq + (null + #code_block + (list "a" "b" #third_value "c") + ) + + (print (get code_block 1)) ;gets the value at index=1, prints b + + (print third_value) ;prints c since that's what this label is referencing + +) + +The explanation for this code above is that **\#code\_block** is in +front of the list, therefore it references the entire list, whereas +**\#third\_value** is in front of the literal string "c" so it +references just that value. +According to the style guide you should put labels on their own lines above the code you want to attach them to. +In the above example, the label **\#third\_value** is not on its own line, but it still references whatever code is immediately after it. +Additionally, **\#code\_block** is placed in a null so that it won't be executed by the seq. + + +There are 4 types of labels in Amalgam: + - `#regular_label` labels accessible by this entity and all 'parent' +container entities, but not 'child' contained entities + - `#^public_label` labels accessible by everyone, contained and container +entities + - `#!private_label` labels accessible only by this entity, not contained +and not container entities + - `##inaccessible_label` multiple-\# means the label, whether its regular, +public or private, are innaccessible by anyone until they are evaluated + + +# More Advanced Operations + +### Apply + +The **(apply** opcode takes whatever code block you pass it, and changes the opcode to whatever the new opcode you specified, and then evaluates that block of code: + +usage: *(apply lambda(<your\_new\_opcode>) +<your\_code\_you\_want\_the\_new\_opcode\_applied\_to>)* + + (seq + (declare (assoc hamsters (list 2 1 3 4 5))) + + ;result is 15 - this will apply the function (+ to the list of items in hamsters + (declare (assoc summed_up (apply (lambda (+)) hamsters) )) + ) + +Essentially what happens is the code **(list** 2 1 3 4 5) *becomes* +**(+** 2 1 3 4 5), i.e., the **(list** opcode is swapped out for a +**(+** and the code block is then evaluated. + +You may use shorthand for lambda inside **(apply** by using double +quotes, like so: **(apply "+" hamsters),** though double quotes must be +used if using apply as a 'cast' operation, to cast code to a type that +doesn't have an opcode, (i.e. **string,** **number** or **symbol**). + +### Zip and Unzip + +Zip behaves like a zipper. It takes two lists and converts them into an assoc. Unzip is the same but in reverse. It returns a list of values that correspond to the specified list of keys. + +usage: *(zip <list\_of\_keys> <list\_of\_values>)* +usage: *(unzip <assoc> <list\_of\_keys>)* + + (seq + (declare (assoc + my_keys (list "a" "b" "c") + my_vals (list 2 4 8) + )) + + (declare (assoc + + ;results in: (assoc "a" 2 "b" 4 "c" 8) + my_map (zip my_keys my_vals) + + ;if values list is not provided, Amalgam defaults it to nulls, the result is: (assoc "a" (null) "b" (null) "c" (null)) + just_keys_no_values_map (zip my_keys) + )) + + ;now that we created an assoc, we can try to unzip it: + (declare (assoc + + ;result is (list 4 8) because those were the values for keys "b" and "c", which is what we passed into the unzip + couple_of_values (unzip my_map (list "b" "c")) + )) + ) + +You can use unzip on lists as well, for lists the 'key' are their +indices: you can think of assocs as key→value pairs with custom defined +keys, and lists as key→value pars where the keys are indices. + +Therefore if we want values from a list at indices 1 and 2, we can pass +in those indices into the unzip: + + (declare (assoc + + ;result is (list "b" "c") because those are the values corresponding to the indices in that list + couple_of_list_values (unzip my_keys (list 1 2)) + + )) + +### Get + +Get retrieves a value from a list or an assoc. + +usage: *(get <code><index>)* + + Getting an individual value from a list or an assoc is basic - you just specify the index of the item you want: + (seq + (declare (assoc + numbers (list 10 20 30 40 50) + numbers_map (assoc "a" 10 "b" 20 "c" 30) + )) + + (declare (assoc + + ;returns 30 since that's the third value (at index 2) + third_value (get numbers 2) + + ;returns 20 since that's the value for key "b" + value_for_key_b (get numbers_map "b") + )) + ) + + +Getting a value that is inside a nested structure requires you to +specify the indices of each level of the nesting you want in order, +using a list as the parameter: + + (seq + (declare (assoc + numbers (list 10 20 (list "a" "b") 40 50) + numbers_map (assoc "a" 10 "b" 20 "c" (assoc "A" 1 "B" (list 2 4 8))) + )) + + (declare (assoc + ;returns "a" - we specified that we want to look at item that's at index 2, and inside that item we want to look at what's at index 0 + first_value_from_third_value (get numbers (list 2 0)) + + ;return 4 - look at value for key "c", inside that look at item for key "B", inside that look at item at index 1 + my_nested_value (get numbers_map (list "c" "B" 1) + )) + ) + + + +### Set + +Set is the same as **(get** above, except you pass in one more parameter specifying what to set the value to, instead of returning it. + +usage: *(set <code> <index> <new\_code>)* + + (seq + (declare (assoc + numbers (list 10 20 30 40 50) + numbers_map (assoc "a" 10 "b" 20 "c" 30) + )) + + (declare (assoc + ;returns (list 10 20 33 40 50) since we've set the value that's at at index 2 + changed_third_value (set numbers 2 33) + + ;returns (assoc "a" 10 "b" 22 "c" 30) since we've set the value for key "b" + changed_value_for_key_b_map (set numbers_map "b" 22) + )) + ) + +Setting values for nested structures also works the same way, you +specify a list of how to 'walk' into the nested structure. + + +### Sort: + +Allows users to write their own comparators by using **(target_value 1)** and **(target_value)** (often referred to as 'a' and 'b' in other languages) as it processes the list. Details on the **(target_value)** opcode and its scope offset parameter can be found farther below. + + (seq + (declare (assoc hamsters (list 2 1 3 4 5))) + (declare (assoc + sorted (sort hamsters) ) ; result is (list 1 2 3 4 5) + + ;as the items in the list are being iterated over, they are set to the built-in opcodes for + ;current value: (target_value) and previous value (target_value 1) + ;and if you specify the comparison method, it'll use the output of that comparison to select the order + reverse_sorted (sort (lambda (< (target_value) (target_value 1))) hamsters) ; result is (list 5 4 3 2 1) + + ) + +### Reduce + +Collapses all items into one using a custom operation. + +usage: *(reduce (lambda <your\_custom\_operation>) <data>)* + +During your custom operation, you can use the two built-in opcodes +**(target_value)** and **(target_value 1). (target_value 1)** is the reduced result during the iteration, while +**(target_value)** is the current item being iterated on. + +For example if you have a list of numbers that you want to add up, **(target_value 1)** +will keep a running total, while **(target_value)** will be set to the value of the +next item to be added: + + (reduce (lambda (+ (target_value 1) (target_value))) (list 1 2 3 4)) + +The internal iteration would be as follows: + +``` +(target_value 1)=1 (target_value)=2 + +(target_value 1)=3 (target_value)=3 + +(target_value 1)=6 (target_value)=4 + +result: 10 +``` + +### Filter and Map + +Filter returns elements that evaluate to true via a custom filtering function, whereas map transforms each element via the custom function. + +usage: *(map <custom\_function> <data>)* +usage: *(filter <custom\_filter\_function> <data>)* + +**(target_value)** is set to the value of the current item, while **(target_index)** is set +to the index "key" of the current item. + + (seq + (declare (assoc + numbers (list 10 20 30 40 50) + numbers_map (assoc "a" 10 "b" 20 "c" 30) + )) + + (declare (assoc + ;iterate over the numbers list and add the index of the number to the number, result: (list 10 21 32 43 54) + modified_numbers + (map + (lambda + (+ (target_value) (target_index)) + ) + numbers + ) + + ;iterate over numbers_map, and for each value, convert it into a list of numbers from 1 to that value + ;result will be: (assoc "a" (list 1 2 3 4 5 6 7 8 9 10) "b" (list 1 2 ...etc... 19 20) "c" (list 1 2 ...etc... 29 30)) + modified_numbers_map + (map + (lambda + ;output a list of numbers from 1 to whatever value is stored in (target_value) + (range 1 (target_value)) + ) + numbers_map + ) + + ;filter leaves items on the list that match the specified condition and removes all others, results in (list 30 40 50) + numbers_over_25 + (filter (lambda (> (target_value) 25)) numbers) + + ;leave only those values whose (target_index) % 2 == 1, results in (list 10 30 50) + odd_indices_only + (filter (lambda (= (mod (target_index) 2) 1)) numbers) + + )) + ) + +Regarding **(target_value)** and **(target_index),** If you have +nested map statements, they will refer to the items being +iterated by the immediately scoped **(map** statement they are in. + + (seq + (declare (assoc matrix (list (list 1 2 3) (list 10 20 30)) )) + + ;iterate over each row in the matrix and print out the values in each row, separating each row with a new line + ;expected output: + ;1 2 3 + ;10 20 30 + (map + (lambda (seq + ;iterate over each value in the row of data + (map + (lambda + ;here (target_value) refers to each value in the row, print it with a space afterward + (print (target_value) " ") + ) + + ;here (target_value) refers to each item, a 'row' in matrix, which itself is a a list of values + (target_value) + ) + + ;print a new line after all the values in the row have been printed + (print "\n") + )) + matrix + ) + ) + + +**(target_value )** and **(target_index )** are used to access the currently iterated value farther up the stack, +where is how many 'layers' to go up. For example: + + (map + (lambda + (map + (lambda + (print + (target_value) " " ; prints inner-most values of 10, 20, and 30 + (target_index) " " ; prints inner-most indices of 0, 1, and 2 + (target_value 1) " " ;prints the target_value from 1 level up the stack, a, b and c + (target_index 1) "\n' ; prints the target_index from 1 level up the stack A, B, and C + ) + ) + (list "10" "20" "30") + ) + ) + (assoc "A" "a" "B" "b" "C" "c") ; note: assocs aren't necessarily iterated in order since they are unordered hashmaps + ) + +``` +outputs: +10 0 b B +20 1 b B +30 2 b B +10 0 c C +20 1 c C +30 2 c C +10 0 a A +20 1 a A +30 2 a A +``` + +Important note regarding **(target_value)** and **(target_index)** and how their offset parameter works: +The following opcodes each have their own scope stack (in the Amalgam reference document, you'll see that these all have a '_Creates one or more new entries on target stack_' under their description): + + assoc + filter + list + map + reduce + replace + rewrite + sort + weave + zip + + ; Example + (map + (lambda (let + (assoc + ; store target_value into x_val, must provide stack-offset of 1 because this is inside a (assoc) that has its own scope + x_val (target_value 1) + + ; store wrapped in a list, must provide stack-offset of 2 because it's inside a (list) that has its own scope + ; and it's inside the (assoc), therefore the value is 2 because it's nested two levels deep + x_list (list (target_value 2)) + ) + + (print + x_val " = " (target_value) "\n" ; in original scope of the (map) statement, not inside a (assoc) + x_list " = " (list (target_value 1)) "\n" ; wrapped in a list, must privode stack-offset of 1 because (list) has its own scope + ) + )) + (list 1 2 3) + ) + + +### Types + +The following are different variations of null or nonexistant: +**(null)** - missing value (or code) +**.nan** - missing number value (Not A Number) +**.nas** - missing string value (Not A String) + +To numerify something you can use `(+` operator: +`(+ "5")` converts the string 5 to the number 5 + +numerifying a null in any way or dividing 0 by 0 results in a `.nan`: +`(+ (null)` = `.nan` +`(* 5 (null))` = `.nan` +`(/ 0 0)` = `.nan` + +To stringify something you can use the `(concat` operator: +`(concat 5)` converts the number 5 into the string 5 + +stringifying a null in any way results in a `.nas`: +`(concat (null))` = `.nas` + +`(zip (list "a" (null)) (list 1 2))) ` = `(assoc "a" 1 .nas 2)` + +To check the type of something you can use either **(get_type** to return the type itself +or **(get_type_string** to return the readable string version of the type: + +`(get_type "hello")` = `""` +`(get_type 5)` = `0` +`(get_type (list 1 2 3))` = `(list)` + +`(get_type_string "hello")` = `"string"` +`(get_type_string 5)` = `"number"` +`(get_type_string (list 1 2 3))` = `"list"` + +### Entities +"Objects" in Amalgam are called **entities**. +The base script being executed is an entity itself. +Entities are named instances of Amalgam code. +The 'root' of an entity is its top-most function, thus a script calling `(retrieve_entity_root)` on itself will print its own code out. + +Entities can contain other entities, aka *child* entities. The 'parent' container entity has full access to its full hierarchy of +contained entities (all its children entities and grand children, etc.) Child entities, however only have access to their parent container +entity's labels that are marked with a `#^` + +To create contained entities use `(create_entities`, optionally specifying a name for each one. If you don't specify one, the system will +create one for youin the format of 10 digit number preceded by a _, e.g., "_2364810274". + + +``` +(create_entities (lambda (null)) ) +(create_entities "bob" (lambda (null)) ) +``` + +The above code creates one empty entity with a system-generated name and one named *bob*. +To see the full list of contained entities use `(contained_entities)`. + +You can create entities contained by other entities by specifying the hierarchy traversal path, for example, to create an entity contained by *bob* named *food* we would do this: +`(create_entities (list "bob" "food") (lambda (null)) )` +And to see all contained entities in *bob* we would do this: +`(contained_entities "bob")` or `(contained_entities (list "bob"))` + +Since methods/functions are referenced via labels (see User Guide for details), we can call functions on child entities like so: + +``` +;create an entity named 'foo' with a method and a static value +(create_entities "foo" (lambda + (null + ##add_five (+ x 5) ;create a method that adds 5 to x + ##static_value "STATIC" + ) +)) +(print + ;execute the method 'add_five' on entity 'foo' passing in 10 as the variable x + (call_entity "foo" "add_five" (assoc x 10)) + " " + ;retrieve the value stored in entity foo's label 'static_value' + (retrieve_from_entity "foo" "static_value") +) +``` +The above code should print out "15 STATIC". + +### Loading Amalgam Files + +You can load an existing file one of two ways: +1) as a contained entity and then access the entity +2) directly into your current file + +As entity: +`(load_entity "filename.amlg" "blah")` +You now have a contained entity named *blah* that you can access via regular entity operations. + +Directly into your file: + +``` +#load_label (null) +(direct_assign_to_entities (assoc load_label (load "filename.amlg")) ) +``` + +The entire contents of your *filename.amlg* will be loaded into *load_label* and accessible directly in the rest of the script. +If the file you are loading is a standalone script, you may execute it via `(call load_label)`. + +### Small Examples + +``` +;convert nulls to 0, while leaving non-nulls as-is: +(or my_value) + +;convert values to numbers: +(+ my_value) + +;convert values to strings: +(concat my_value) + +;get rid of dupes in a list: +(indices (zip list_with_dupes)) + +;collapse a list of lists into a unique list: +(indices (zip (reduce (lambda (append (target_value) (target_value 1))) list_of_lists ))) + +;count instances of a 'value' in a list: +(size (filter (lambda (= (target_value) value) your_list))) + +;return values at specific indices for a list (slow): +(filter (lambda (contains_value indices_list (target_index))) your_list) + +;return values at specific indices for a list (fast): +(unzip your_list indices_list) + +;check if a list is all of one value, e.g., (null): +(apply "=" (append your_list (list (null)))) + +;set a nested key 'third' in myassoc that's three levels down: +;python equivalent of myassoc['first']['second']['third'] = value_to_set +(assign "myassoc" (list "first" "second" "third") value_to_set) + + +;sort keys in an assoc by their corresponding values +(sort + (lambda (> (get your_assoc (target_value)) (get your_assoc (target_value 1)))) + (indices your_assoc) +) + +;convert from a list of assocs to a flat assoc +;i.e. (list (assoc ...) (assoc ...)) into (assoc ...) +(reduce (lambda (append (target_value) (target_value 1)) list_of_assocs) +or +(apply "append" list_of_assocs) + +;check if small_list is a subset of big_list +;by converting small_list into an assoc and removing all big_list's keys from it +;there should be no keys remaining +(= 0 (size (remove (zip small_list) big_list)) ) + +;get all possible indices of a specific your_value in a list +(filter + (lambda (= your_value (get your_list (target_value)))) + (indices your_list) +) + +;convert a list of lists into a flat list +(apply "append" list) + + +;dynamically set parallelism (concurrency) based on some logic +(null + #mymap (map (lambda (print (target_value) "\n")) (range 1 30)) +) +(if run_mt + (call (set_concurrency mymap (true))) + (call mymap) +) +``` + +# Style Guide + +**General rules:** + +1. **tabs**, not spaces +2. snake case (e.g., this\_is\_a\_long\_variable\_name ) for local variables, camelCaseVariables for global variables +3. spaces between operators and parenthesis, +good:`(/ (+ 3 4) 7)` + bad:`(/(+ 3 4)7)` +4. no more than **2** opening parenthesis per line *(see exception rule \#6)* +5. no more than **2** closing parenthesis per line *(see exception rule \#6)* +6. if the statement is simple and short enough to fit entirely on one line, go ahead, but use your judgement for readability +7. closing parens should match opening parens in terms of indentation for any first operator on a line +8. one tab indent for new lines continuing a statement +9. no double indents to adhere to rule #7 +10. since labels are treated as functions and attributes, a label that is a `(declare (assoc` statement treats that first `(assoc` like input parameters, try to avoid declaring variables in that initial statement that are not parameters to the function +11. all comments should go on their own lines, **above** the code they are commenting +12. append `_map` (or `_set`) to variable names that will only ever store assocs + +> ;bad: >2 opening parens and closing statement paren doesn't line up with opening paren - breaks rules #4 and #7 +> (declare (assoc test (/ +> (+ 3 4) 7) +> +> +> ;while this indentation is fine, it's better to avoid single line indents like this since it's unnecessary +> (declare +> (assoc test (/ (+ 3 4) 7)) ;this is a poorly placed comment, it should be on its own line per rule #11 +> ) +> +> ;good example, all one line since the statement is simple per rule #6 +> (declare (assoc test (/ (+ 3 4) 7) )) +> +> +> ;good example of single indent because both pairs of parens open and close +> ;at the same indentation amount, per rules #4,#5 and #8 +> (declare (assoc +> test (/ (+ 3 4) 7) +> things (list "of" "stuff") +> )) +> +> ;old styling, acceptable but should be changed per rule #9. +> ;avoid double indents. (assoc should be on its own line below, see next example. +> (declare (assoc +> test (/ (+ 3 4) 7) +> things (list "of" "stuff") +> stuff_map (assoc "key" "value") +> ) +> ) +> +> ;good example of how the open parenthesis for declare and assoc match indentations per rule #7 +> (declare +> (assoc +> test (/ (+ 3 4) 7) +> test2 +> ;this comment is above the code it's for, and it's fine to one-line simple calls that only take a few parameters +> (call some_function (assoc param1 val1)) +> test3 +> (call another_function (assoc +> param1 val1 +> param2 val2 +> )) +> ) +> ) +> diff --git a/Amalgam.sln b/Amalgam.sln new file mode 100644 index 00000000..7dcced9e --- /dev/null +++ b/Amalgam.sln @@ -0,0 +1,55 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.2.32519.379 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Amalgam", "src\Amalgam\Amalgam.vcxproj", "{640515C4-B87F-4210-A603-F091804FAE5A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + MT_Debug_DLL|x64 = MT_Debug_DLL|x64 + MT_Debug_EXE|x64 = MT_Debug_EXE|x64 + MT_Release_DLL|x64 = MT_Release_DLL|x64 + MT_Release_EXE|x64 = MT_Release_EXE|x64 + OMP_Release_DLL|x64 = OMP_Release_DLL|x64 + OMP_Release_EXE|x64 = OMP_Release_EXE|x64 + ST_Debug_DLL|x64 = ST_Debug_DLL|x64 + ST_Debug_EXE|x64 = ST_Debug_EXE|x64 + ST_Release_DLL|x64 = ST_Release_DLL|x64 + ST_Release_EXE|x64 = ST_Release_EXE|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {640515C4-B87F-4210-A603-F091804FAE5A}.MT_Debug_DLL|x64.ActiveCfg = MT_Debug_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.MT_Debug_DLL|x64.Build.0 = MT_Debug_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.MT_Debug_EXE|x64.ActiveCfg = MT_Debug_EXE|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.MT_Debug_EXE|x64.Build.0 = MT_Debug_EXE|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.MT_Release_DLL|x64.ActiveCfg = MT_Release_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.MT_Release_DLL|x64.Build.0 = MT_Release_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.MT_Release_EXE|x64.ActiveCfg = MT_Release_EXE|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.MT_Release_EXE|x64.Build.0 = MT_Release_EXE|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.OMP_Release_DLL|x64.ActiveCfg = OMP_Release_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.OMP_Release_DLL|x64.Build.0 = OMP_Release_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.OMP_Release_EXE|x64.ActiveCfg = OMP_Release_EXE|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.OMP_Release_EXE|x64.Build.0 = OMP_Release_EXE|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.ST_Debug_DLL|x64.ActiveCfg = ST_Debug_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.ST_Debug_DLL|x64.Build.0 = ST_Debug_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.ST_Debug_EXE|x64.ActiveCfg = ST_Debug_EXE|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.ST_Debug_EXE|x64.Build.0 = ST_Debug_EXE|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.ST_Release_DLL|x64.ActiveCfg = ST_Release_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.ST_Release_DLL|x64.Build.0 = ST_Release_DLL|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.ST_Release_EXE|x64.ActiveCfg = ST_Release_EXE|x64 + {640515C4-B87F-4210-A603-F091804FAE5A}.ST_Release_EXE|x64.Build.0 = ST_Release_EXE|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {9ACB0716-90FD-4E04-B84E-B339407211CF} + EndGlobalSection + GlobalSection(Performance) = preSolution + HasPerformanceSessions = true + EndGlobalSection + GlobalSection(Performance) = preSolution + HasPerformanceSessions = true + EndGlobalSection +EndGlobal diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..9ce2a748 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,287 @@ +# +# Amalgam Language Interpreter - CMake build +# + +cmake_minimum_required(VERSION 3.26) +project(amalgam LANGUAGES CXX DESCRIPTION "Amalgam Language Interpreter") + +# Options: +set(USE_OBJECT_LIBS ON CACHE BOOL "Build using object libs") +set(TRY_GIT_TAG_FOR_UNKNOWN_VERSION ON CACHE BOOL + "If env var AMALGAM_BUILD_VERSION not found, try to use latest git tag for version") + +# Project CMake modules +list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/build/cmake") +include(global_settings) +include(version) +include(global_compiler_flags) + +# Print useful info from global settings/flags/env +message(STATUS "Amalgam version (orig) : '${AMALGAM_VERSION_ORIG}'") +message(STATUS "Amalgam version (clean) : '${AMALGAM_VERSION}'") +message(STATUS "Amalgam version base : '${AMALGAM_VERSION_BASE}'") +message(STATUS "Amalgam version prelease : '${AMALGAM_VERSION_PRERELEASE}'") +message(STATUS "Amalgam version metadata : '${AMALGAM_VERSION_METADATA}'") +message(STATUS "Amalgam version full : '${AMALGAM_VERSION_FULL}'") +message(STATUS "Amalgam version full escaped : '${AMALGAM_VERSION_FULL_ESCAPED}'") +message(STATUS "Use object libs : ${USE_OBJECT_LIBS}") +message(STATUS "System name : ${CMAKE_SYSTEM_NAME}") +message(STATUS "OS : ${OS}") +message(STATUS "Target system version : ${CMAKE_SYSTEM_VERSION}") +if(IS_VISUALSTUDIO) +message(STATUS "Target system version - VS : ${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION}") +endif() +message(STATUS "Target system processor : ${CMAKE_SYSTEM_PROCESSOR}") +message(STATUS "Architecture : ${ARCH}") +message(STATUS "Architecture version : ${ARCH_VERSION}") +if(IS_AMD64) +message(STATUS "Advanced intrinsics : ${ADVANCED_INTRINSICS_AMD64}") +endif() +message(STATUS "Host system processor : ${CMAKE_HOST_SYSTEM_PROCESSOR}") +message(STATUS "Number logical cores : ${NUMBER_OF_LOGICAL_CORES}") +message(STATUS "Number physical cores : ${NUMBER_OF_PHYSICAL_CORES}") +message(STATUS "Total virtual memory : ${TOTAL_VIRTUAL_MEMORY} MiB") +message(STATUS "Available virtual memory : ${AVAILABLE_VIRTUAL_MEMORY} MiB") +message(STATUS "Total physical memory : ${TOTAL_PHYSICAL_MEMORY} MiB") +message(STATUS "Available physical memory : ${AVAILABLE_PHYSICAL_MEMORY} MiB") +message(STATUS "Build type : '${CMAKE_BUILD_TYPE}'") +if(IS_VISUALSTUDIO) +message(STATUS "Build types (multi-configs) : '${CMAKE_CONFIGURATION_TYPES}'") +endif() +message(STATUS "CMake version : ${CMAKE_VERSION}") +message(STATUS "Generator : ${CMAKE_GENERATOR}") +message(STATUS "Compiler : ${CMAKE_CXX_COMPILER_ID}") +message(STATUS "Compiler version : ${CMAKE_CXX_COMPILER_VERSION}") +if(IS_MSVC) +message(STATUS "MSVC version : ${MSVC_VERSION}") +message(STATUS "MSVC toolset : ${MSVC_TOOLSET_VERSION}") +endif() +message(STATUS "Compiler C++ flags : ${CMAKE_CXX_FLAGS}") +message(STATUS "Compiler C++ debug flags : ${CMAKE_CXX_FLAGS_DEBUG}") +message(STATUS "Compiler C++ release flags : ${CMAKE_CXX_FLAGS_RELEASE}") +message(STATUS "Linker C++ app flags : ${CMAKE_EXE_LINKER_FLAGS}") +message(STATUS "Linker C++ app debug flags : ${CMAKE_EXE_LINKER_FLAGS_DEBUG}") +message(STATUS "Linker C++ app release flags : ${CMAKE_EXE_LINKER_FLAGS_RELEASE}") +message(STATUS "Linker C++ lib flags : ${CMAKE_SHARED_LINKER_FLAGS}") +message(STATUS "Linker C++ lib debug flags : ${CMAKE_SHARED_LINKER_FLAGS_DEBUG}") +message(STATUS "Linker C++ lib release flags : ${CMAKE_SHARED_LINKER_FLAGS_RELEASE}") + + +# +# Source files +# + +# Common source between apps & libs +# Note: do not change to globbing, that is a CMake anti-pattern +set(COMMON_SOURCE + src/3rd_party/date/date.h + src/3rd_party/date/ios.h + src/3rd_party/date/tz.cpp + src/3rd_party/date/tz.h + src/3rd_party/date/tz_private.h + src/3rd_party/fast_log/src/exp_table.h + src/3rd_party/murmurhash3/MurmurHash3.cpp + src/3rd_party/murmurhash3/MurmurHash3.h + src/3rd_party/simdjson/simdjson.cpp + src/3rd_party/simdjson/simdjson.h + src/3rd_party/rapidyaml/rapidyaml-0.5.0.hpp + src/3rd_party/skarupke_maps/bytell_hash_map.hpp + src/3rd_party/skarupke_maps/flat_hash_map.hpp + src/3rd_party/swiftdtoa/SwiftDtoa.cpp + src/3rd_party/swiftdtoa/SwiftDtoa.h + src/3rd_party/tweetnacl/tweetnacl.cpp + src/3rd_party/tweetnacl/tweetnacl.h + src/Amalgam/Amalgam.h + src/Amalgam/AmalgamAPI.cpp + src/Amalgam/AmalgamVersion.h + src/Amalgam/AssetManager.cpp + src/Amalgam/AssetManager.h + src/Amalgam/BinaryPacking.cpp + src/Amalgam/BinaryPacking.h + src/Amalgam/Conviction.h + src/Amalgam/ConvictionUtil.h + src/Amalgam/Cryptography.cpp + src/Amalgam/Cryptography.h + src/Amalgam/DateTimeFormat.cpp + src/Amalgam/DateTimeFormat.h + src/Amalgam/DistanceReferencePair.h + src/Amalgam/entity/Entity.cpp + src/Amalgam/entity/Entity.h + src/Amalgam/entity/EntityExternalInterface.cpp + src/Amalgam/entity/EntityExternalInterface.h + src/Amalgam/entity/EntityManipulation.cpp + src/Amalgam/entity/EntityManipulation.h + src/Amalgam/entity/EntityQueries.cpp + src/Amalgam/entity/EntityQueries.h + src/Amalgam/entity/EntityQueriesStatistics.h + src/Amalgam/entity/EntityQueryBuilder.h + src/Amalgam/entity/EntityQueryCaches.cpp + src/Amalgam/entity/EntityQueryCaches.h + src/Amalgam/entity/EntityQueryManager.h + src/Amalgam/entity/EntityWriteListener.cpp + src/Amalgam/entity/EntityWriteListener.h + src/Amalgam/evaluablenode/EvaluableNode.cpp + src/Amalgam/evaluablenode/EvaluableNode.h + src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp + src/Amalgam/evaluablenode/EvaluableNodeManagement.h + src/Amalgam/evaluablenode/EvaluableNodeTreeDifference.cpp + src/Amalgam/evaluablenode/EvaluableNodeTreeDifference.h + src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.cpp + src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.h + src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp + src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h + src/Amalgam/FastEMath.h + src/Amalgam/FastMath.h + src/Amalgam/FilenameEscapeProcessor.h + src/Amalgam/GeneralizedDistance.h + src/Amalgam/HashMaps.h + src/Amalgam/importexport/FileSupportCSV.cpp + src/Amalgam/importexport/FileSupportCSV.h + src/Amalgam/importexport/FileSupportJSON.cpp + src/Amalgam/importexport/FileSupportJSON.h + src/Amalgam/importexport/FileSupportYAML.cpp + src/Amalgam/importexport/FileSupportYAML.h + src/Amalgam/IntegerSet.h + src/Amalgam/interpreter/Interpreter.cpp + src/Amalgam/interpreter/Interpreter.h + src/Amalgam/interpreter/InterpreterDebugger.cpp + src/Amalgam/interpreter/InterpreterOpcodesBase.cpp + src/Amalgam/interpreter/InterpreterOpcodesCodeMixing.cpp + src/Amalgam/interpreter/InterpreterOpcodesDataTypes.cpp + src/Amalgam/interpreter/InterpreterOpcodesEntityAccess.cpp + src/Amalgam/interpreter/InterpreterOpcodesEntityControl.cpp + src/Amalgam/interpreter/InterpreterOpcodesListManipulation.cpp + src/Amalgam/interpreter/InterpreterOpcodesLogic.cpp + src/Amalgam/interpreter/InterpreterOpcodesMath.cpp + src/Amalgam/interpreter/InterpreterOpcodesTransformations.cpp + src/Amalgam/KnnCache.h + src/Amalgam/Merger.h + src/Amalgam/Opcodes.cpp + src/Amalgam/Opcodes.h + src/Amalgam/Parser.cpp + src/Amalgam/Parser.h + src/Amalgam/PartialSum.h + src/Amalgam/PerformanceProfiler.cpp + src/Amalgam/PerformanceProfiler.h + src/Amalgam/PlatformSpecific.cpp + src/Amalgam/PlatformSpecific.h + src/Amalgam/PrintListener.cpp + src/Amalgam/PrintListener.h + src/Amalgam/rand/RandomStream.cpp + src/Amalgam/rand/RandomStream.h + src/Amalgam/rand/WeightedDiscreteRandomStream.h + src/Amalgam/resource.h + src/Amalgam/SBFDSColumnData.h + src/Amalgam/SeparableBoxFilterDataStore.cpp + src/Amalgam/SeparableBoxFilterDataStore.h + src/Amalgam/string/StringInternPool.cpp + src/Amalgam/string/StringInternPool.h + src/Amalgam/string/StringManipulation.cpp + src/Amalgam/string/StringManipulation.h +) +set(COMMON_SOURCE_THREADS + ${COMMON_SOURCE} + src/Amalgam/Concurrency.cpp + src/Amalgam/Concurrency.h + src/Amalgam/ThreadPool.cpp + src/Amalgam/ThreadPool.h +) +set(RESOURCE_SOURCE) +if(IS_MSVC) + set(RESOURCE_SOURCE + docs/icon/amalgam.ico + src/Amalgam/resource.h + src/Amalgam/Resource.rc + ) +endif() +set(AMALGAM_APP_ONLY_SOURCE + src/Amalgam/AmalgamMain.cpp + src/Amalgam/AmalgamTrace.cpp + ${RESOURCE_SOURCE} +) +set(AMALGAM_LIB_ONLY_SOURCE ${RESOURCE_SOURCE}) +set(AMALGAM_ALL_SOURCE ${AMALGAM_APP_ONLY_SOURCE} ${COMMON_SOURCE_THREADS}) +source_group(TREE ${CMAKE_SOURCE_DIR} FILES ${AMALGAM_ALL_SOURCE}) + + +# +# Include dirs +# + +include_directories( + "${PROJECT_SOURCE_DIR}/src/3rd_party" + "${PROJECT_SOURCE_DIR}/src/Amalgam" + "${PROJECT_SOURCE_DIR}/src/Amalgam/entity" + "${PROJECT_SOURCE_DIR}/src/Amalgam/evaluablenode" + "${PROJECT_SOURCE_DIR}/src/Amalgam/importexport" + "${PROJECT_SOURCE_DIR}/src/Amalgam/interpreter" + "${PROJECT_SOURCE_DIR}/src/Amalgam/rand" + "${PROJECT_SOURCE_DIR}/src/Amalgam/string" +) + + +# +# Compiled targets +# + +include(custom_add_target) + +# Multi-threaded targets (amalgam-mt): +add_compiled_target(AUTO_NAME TYPE "objlib" USE_ADVANCED_ARCH_INTRINSICS USE_THREADS + SOURCE ${COMMON_SOURCE_THREADS} IDE_FOLDER "OtherBuildTargets") +add_compiled_target(AUTO_NAME TYPE "app" USE_ADVANCED_ARCH_INTRINSICS USE_THREADS + SOURCE ${COMMON_SOURCE_THREADS} APP_ONLY_SOURCE ${AMALGAM_APP_ONLY_SOURCE}) +add_compiled_target(AUTO_NAME TYPE "sharedlib" USE_ADVANCED_ARCH_INTRINSICS USE_THREADS + SOURCE ${AMALGAM_LIB_ONLY_SOURCE} ${COMMON_SOURCE_THREADS} IDE_FOLDER "OtherBuildTargets") +set_property(DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT amalgam-mt-app) + +# Multi-threaded w/ no arch intrinsics targets (amalgam-mt-noavx): +# +# Note: On amd64, an app/lib combo is built without advanced intrinsics to be run on +# VMs/emulators/etc that do not have support for them. On arm64 archs, all targets don't +# use advanced intrinsics so this target is redundant. +if(IS_AMD64 AND NOT IS_WASM) + add_compiled_target(NAME "${PROJECT_NAME}-mt-${NO_ADVANCED_INTRINSICS_AMD64_SUFFIX}-objlib" TYPE "objlib" USE_THREADS + SOURCE ${COMMON_SOURCE_THREADS} IDE_FOLDER "OtherBuildTargets") + add_compiled_target(NAME "${PROJECT_NAME}-mt-${NO_ADVANCED_INTRINSICS_AMD64_SUFFIX}-app" TYPE "app" USE_THREADS + SOURCE ${COMMON_SOURCE_THREADS} APP_ONLY_SOURCE ${AMALGAM_APP_ONLY_SOURCE} IDE_FOLDER "OtherBuildTargets") + add_compiled_target(NAME "${PROJECT_NAME}-mt-${NO_ADVANCED_INTRINSICS_AMD64_SUFFIX}-sharedlib" TYPE "sharedlib" USE_THREADS + SOURCE ${AMALGAM_LIB_ONLY_SOURCE} ${COMMON_SOURCE_THREADS} IDE_FOLDER "OtherBuildTargets") +endif() + +# OpenMP targets (amalgam-omp): +add_compiled_target(AUTO_NAME TYPE "objlib" USE_ADVANCED_ARCH_INTRINSICS USE_OPENMP + SOURCE ${COMMON_SOURCE_THREADS} IDE_FOLDER "OtherBuildTargets") +add_compiled_target(AUTO_NAME TYPE "app" USE_ADVANCED_ARCH_INTRINSICS USE_OPENMP + SOURCE ${COMMON_SOURCE_THREADS} APP_ONLY_SOURCE ${AMALGAM_APP_ONLY_SOURCE} IDE_FOLDER "OtherBuildTargets") +add_compiled_target(AUTO_NAME TYPE "sharedlib" USE_ADVANCED_ARCH_INTRINSICS USE_OPENMP + SOURCE ${AMALGAM_LIB_ONLY_SOURCE} ${COMMON_SOURCE_THREADS} IDE_FOLDER "OtherBuildTargets") + +# Single-threaded targets (amalgam-st): +add_compiled_target(AUTO_NAME TYPE "objlib" USE_ADVANCED_ARCH_INTRINSICS + SOURCE ${COMMON_SOURCE} IDE_FOLDER "OtherBuildTargets") +add_compiled_target(AUTO_NAME TYPE "app" USE_ADVANCED_ARCH_INTRINSICS + SOURCE ${COMMON_SOURCE} APP_ONLY_SOURCE ${AMALGAM_APP_ONLY_SOURCE}) +add_compiled_target(AUTO_NAME TYPE "sharedlib" USE_ADVANCED_ARCH_INTRINSICS + SOURCE ${AMALGAM_LIB_ONLY_SOURCE} ${COMMON_SOURCE} IDE_FOLDER "OtherBuildTargets") + +# Single-threaded PGC targets (amalgam-st-pgc): +# Note: only run on amd64 linux since it is for test verification only, hence NO_INSTALL +if(IS_LINUX AND IS_AMD64) + add_compiled_target(AUTO_NAME TYPE "objlib" USE_ADVANCED_ARCH_INTRINSICS USE_PGC NO_INSTALL + SOURCE ${COMMON_SOURCE} IDE_FOLDER "OtherBuildTargets") + add_compiled_target(AUTO_NAME TYPE "app" USE_ADVANCED_ARCH_INTRINSICS USE_PGC NO_INSTALL + SOURCE ${COMMON_SOURCE} APP_ONLY_SOURCE ${AMALGAM_APP_ONLY_SOURCE} IDE_FOLDER "OtherBuildTargets") + add_compiled_target(AUTO_NAME TYPE "sharedlib" USE_ADVANCED_ARCH_INTRINSICS USE_PGC NO_INSTALL + SOURCE ${AMALGAM_LIB_ONLY_SOURCE} ${COMMON_SOURCE} IDE_FOLDER "OtherBuildTargets") +endif() + + +# +# Additional artifacts/test/etc +# + +include(create_no_suffix_artifacts) +include(create_tests) +include(create_package) +include(create_static_targets) diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 00000000..1f1834f9 --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,489 @@ +{ + "version": 5, + "cmakeMinimumRequired": { + "major": 3, + "minor": 26 + }, + "configurePresets": [ + { + "name": "base", + "hidden": true, + "description": "Base preset (http://aka.ms/cmakepresetsvs)", + "generator": "Ninja", + "binaryDir": "${sourceDir}/out/build/${presetName}", + "installDir": "${sourceDir}/out/install/${presetName}", + "cacheVariables": { + "CMAKE_CXX_STANDARD": "17", + "CMAKE_CXX_STANDARD_REQUIRED": "YES", + "CMAKE_CXX_EXTENSIONS": "OFF", + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON" + } + }, + + { + "name": "windows", + "hidden": true, + "description": "Sets windows os" + }, + { + "name": "linux", + "hidden": true, + "description": "Sets linux os" + }, + { + "name": "macos", + "hidden": true, + "description": "Sets macos os", + "cacheVariables": { + "CMAKE_OSX_DEPLOYMENT_TARGET": "11" + } + }, + + { + "name": "amd64", + "hidden": true, + "description": "Sets amd64 arch", + "architecture": { + "value": "x64", + "strategy": "external" + } + }, + { + "name": "arm64", + "hidden": true, + "description": "Sets arm64 arch", + "cacheVariables": { + "ARCH": "arm64" + }, + "architecture": { + "value": "arm64", + "strategy": "external" + } + }, + { + "name": "arm64_8a", + "hidden": true, + "description": "Sets arm64_8a arch", + "cacheVariables": { + "ARCH": "arm64_8a" + }, + "architecture": { + "value": "arm64", + "strategy": "external" + } + }, + { + "name": "wasm64", + "hidden": true, + "description": "Sets wasm64 arch", + "cacheVariables": { + "ARCH": "wasm64" + }, + "architecture": { + "value": "amd64", + "strategy": "external" + } + }, + + { + "name": "debug", + "hidden": true, + "description": "Sets debug build type", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "debug" + } + }, + { + "name": "release", + "hidden": true, + "description": "Sets release build type", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "release" + } + }, + + { + "name": "msvc", + "hidden": true, + "description": "Sets msvc compiler", + "cacheVariables": { + "CMAKE_CXX_COMPILER": "cl.exe" + }, + "toolset": { + "value": "host=x64", + "strategy": "external" + } + }, + { + "name": "clang", + "hidden": true, + "description": "Sets clang compiler", + "cacheVariables": { + "CMAKE_CXX_COMPILER": "clang++" + } + }, + { + "name": "gcc", + "hidden": true, + "description": "Sets gcc compiler", + "cacheVariables": { + "CMAKE_CXX_COMPILER": "g++-10" + } + }, + { + "name": "gcc-arm", + "hidden": true, + "description": "Sets gcc-arm compiler", + "cacheVariables": { + "CMAKE_CXX_COMPILER": "aarch64-linux-gnu-g++-10" + } + }, + { + "name": "emcc", + "hidden": true, + "description": "Sets emcc wasm64 compiler", + "cacheVariables": { + "CMAKE_CXX_COMPILER": "em++" + } + }, + + { + "name": "macos-amd64", + "hidden": true, + "description": "Sets macos-amd64 deploy target", + "inherits": [ "macos" ], + "cacheVariables": { + "CMAKE_OSX_ARCHITECTURES": "x86_64" + } + }, + { + "name": "macos-arm64", + "hidden": true, + "description": "Sets macos-arm64 deploy target", + "inherits": [ "macos" ], + "cacheVariables": { + "CMAKE_OSX_ARCHITECTURES": "arm64" + } + }, + + { + "name": "amd64-debug-windows", + "description": "msvc for amd64 (debug) for windows", + "inherits": [ "base", "windows", "amd64", "debug", "msvc" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Windows" + } + }, + { + "name": "amd64-release-windows", + "description": "msvc for amd64 (release) for windows", + "inherits": [ "base", "windows", "amd64", "release", "msvc" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Windows" + } + }, + + { + "name": "amd64-windows-vs", + "description": "msvc for amd64 for windows for Visual Studio sln", + "inherits": [ "base", "windows", "amd64", "msvc" ], + "generator": "Visual Studio 17 2022", + "cacheVariables": { + "CMAKE_CONFIGURATION_TYPES": "debug;release" + }, + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Windows" + } + }, + + { + "name": "amd64-debug-linux", + "description": "gcc for amd64 (debug) for linux", + "inherits": [ "base", "linux", "amd64", "debug", "gcc" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Linux" + } + }, + { + "name": "amd64-release-linux", + "description": "gcc for amd64 (release) for linux", + "inherits": [ "base", "linux", "amd64", "release", "gcc" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Linux" + } + }, + { + "name": "arm64-debug-linux", + "description": "gcc for arm64 (debug) for linux", + "inherits": [ "base", "linux", "arm64", "debug", "gcc-arm" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Linux" + } + }, + { + "name": "arm64-release-linux", + "description": "gcc for arm64 (release) for linux", + "inherits": [ "base", "linux", "arm64", "release", "gcc-arm" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Linux" + } + }, + { + "name": "arm64_8a-debug-linux", + "description": "gcc for arm64_8a (debug) for linux", + "inherits": [ "base", "linux", "arm64_8a", "debug", "gcc-arm" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Linux" + } + }, + { + "name": "arm64_8a-release-linux", + "description": "gcc for arm64_8a (release) for linux", + "inherits": [ "base", "linux", "arm64_8a", "release", "gcc-arm" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Linux" + } + }, + { + "name": "wasm64-debug-linux", + "description": "emcc for wasm64 (debug) for linux", + "inherits": [ "base", "linux", "wasm64", "debug", "emcc" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Linux" + } + }, + { + "name": "wasm64-release-linux", + "description": "emcc for wasm64 (release) for linux", + "inherits": [ "base", "linux", "wasm64", "release", "emcc" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Linux" + } + }, + + { + "name": "amd64-debug-macos", + "description": "clang for amd64 (debug) for macos", + "inherits": [ "base", "macos-amd64", "amd64", "debug", "clang" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Darwin" + } + }, + { + "name": "amd64-release-macos", + "description": "clang for amd64 (release) for macos", + "inherits": [ "base", "macos-amd64", "amd64", "release", "clang" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Darwin" + } + }, + + { + "name": "arm64-debug-macos", + "description": "clang for arm64 (debug) for macos", + "inherits": [ "base", "macos-arm64", "arm64", "debug", "clang" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Darwin" + } + }, + { + "name": "arm64-release-macos", + "description": "clang for arm64 (release) for macos", + "inherits": [ "base", "macos-arm64", "arm64", "release", "clang" ], + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Darwin" + } + } + ], + "buildPresets": [ + { + "name": "amd64-debug-windows", + "configurePreset": "amd64-debug-windows", + "description": "windows amd64 debug build" + }, + { + "name": "amd64-release-windows", + "configurePreset": "amd64-release-windows", + "description": "windows amd64 release build" + }, + { + "name": "amd64-debug-linux", + "configurePreset": "amd64-debug-linux", + "description": "linux amd64 debug build" + }, + { + "name": "amd64-release-linux", + "configurePreset": "amd64-release-linux", + "description": "linux amd64 release build" + }, + { + "name": "arm64-debug-linux", + "configurePreset": "arm64-debug-linux", + "description": "linux arm64 debug build" + }, + { + "name": "arm64-release-linux", + "configurePreset": "arm64-release-linux", + "description": "linux arm64 release build" + }, + { + "name": "arm64_8a-release-linux", + "configurePreset": "arm64_8a-release-linux", + "description": "linux arm64_8a release build" + }, + { + "name": "wasm64-debug-linux", + "configurePreset": "wasm64-debug-linux", + "description": "linux wasm64 debug build" + }, + { + "name": "wasm64-release-linux", + "configurePreset": "wasm64-release-linux", + "description": "linux wasm64 release build" + }, + { + "name": "amd64-debug-macos", + "configurePreset": "amd64-debug-macos", + "description": "macos amd64 debug build" + }, + { + "name": "amd64-release-macos", + "configurePreset": "amd64-release-macos", + "description": "macos amd64 release build" + }, + { + "name": "arm64-debug-macos", + "configurePreset": "arm64-debug-macos", + "description": "macos arm64 debug build" + }, + { + "name": "arm64-release-macos", + "configurePreset": "arm64-release-macos", + "description": "macos arm64 release build" + } + ], + "testPresets": [ + { + "name": "base", + "description": "Enable output on failure", + "hidden": true, + "output": { + "outputOnFailure": true + } + }, + { + "name": "amd64-release-windows", + "description": "windows amd64 release tests", + "inherits": "base", + "configurePreset": "amd64-release-windows" + }, + { + "name": "amd64-debug-windows", + "description": "windows amd64 debug tests", + "inherits": "base", + "configurePreset": "amd64-debug-windows" + }, + { + "name": "amd64-release-linux", + "description": "linux amd64 release tests", + "inherits": "base", + "configurePreset": "amd64-release-linux" + }, + { + "name": "amd64-debug-linux", + "description": "linux amd64 debug tests", + "inherits": "base", + "configurePreset": "amd64-debug-linux" + }, + { + "name": "arm64-release-linux", + "description": "linux arm64 release tests", + "inherits": "base", + "configurePreset": "arm64-release-linux" + }, + { + "name": "arm64-debug-linux", + "description": "linux arm64 debug tests", + "inherits": "base", + "configurePreset": "arm64-debug-linux" + }, + { + "name": "arm64_8a-release-linux", + "description": "linux arm64_8a release tests", + "inherits": "base", + "configurePreset": "arm64_8a-release-linux" + }, + { + "name": "arm64_8a-debug-linux", + "description": "linux arm64_8a debug tests", + "inherits": "base", + "configurePreset": "arm64_8a-debug-linux" + }, + { + "name": "wasm64-release-linux", + "description": "linux wasm64 release tests", + "inherits": "base", + "configurePreset": "wasm64-release-linux" + }, + { + "name": "wasm64-debug-linux", + "description": "linux wasm64 debug tests", + "inherits": "base", + "configurePreset": "wasm64-debug-linux" + }, + { + "name": "amd64-release-macos", + "description": "macos amd64 release tests", + "inherits": "base", + "configurePreset": "amd64-release-macos" + }, + { + "name": "amd64-debug-macos", + "description": "macos amd64 debug tests", + "inherits": "base", + "configurePreset": "amd64-debug-macos" + }, + { + "name": "arm64-release-macos", + "description": "macos arm64 release tests", + "inherits": "base", + "configurePreset": "arm64-release-macos" + }, + { + "name": "arm64-debug-macos", + "description": "macos arm64 debug tests", + "inherits": "base", + "configurePreset": "arm64-debug-macos" + } + ] +} \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..d096502a --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,115 @@ +# Contributing + +This Howso™ opensource project only accepts code contributions from individuals and organizations that have signed a contributor license agreement. For more information on contributing and for links to the individual and corporate CLAs, please visit: https://www.howso.com/cla + +# General Guidelines +In general, there are a few general principles to keep in mind for contributing. + +1. The interpreter should attempt to return as valid result as possible, and should never error out or crash. If the parameters are nonsensical, returning a null is the preferred approach. Optionally, a warning or error may be emitted to stderr, and making warnings for likely improper use is planned for future development. The interpreter should ideally never crash. +1. When adding functionality, consider whether it could be included in an existing operator / opcode via an additional parameter or expansion of parameter types. Effective comparing, merging, and generating of code works best when the branching factor of opcode types is not too large. +1. Efficiency and multithreading are important. Please do not include something that will slow it down unless there is a significant value gain. In particular, the query engine has been highly optimized, and adding a single branch in some of the inner loops can reduce performance on some applications by significant amounts. +1. Look around and ask the community before undertaking a big effort. Perhaps someone else is already working on it, or perhaps there's a reason why something has not been done yet. +1. Cross-platform support is important. Any contribution should in theory compile on all major compilers for all major operating systems and all major hardware supported. It's not expected that every contributor has every environment handy, but automated testing will hopefully reveal gaps. +1. If submitting a bug, please try to find a minimally reproducing test first. +1. Follow the coding style conventions listed in this document. +1. Prior to using **any** new 3rd party library, the library must be reviewed for utility, license considerations, and security. A library that adds considerable functionality, only requires cross-platform standard C++11, only requires including a couple of files, with good documentation and readable code, and has a vibrant and active international community is highly likely to be accepted. An unknown library with one anonymous contributor, restrictive license, with poor documentation and code quality, which is difficult to use, requiring complex additions to the build process is highly likely to be rejected no matter the potential functionality. Between these extremes is room for discussion. +1. There is no "my code", "your code", and "Bob's code". This means: + - No @author or other specific attribution in source code + - Anyone can work on any part of the system, as long as they have the + necessary knowledge and skill and it makes sense for them to do so + - Do say: + - "There's a bug in X that manifests when the Y method is called with Z." + - Don't say: + - "There's a bug in Bob's code where it interacts with the module + Alice contributed last week." + +# Coding Style + +1. Properly name your variables. We're not competing for "least amount + of code written to get it working". Long variable names are + acceptable when they aid understanding. Use your best judgement. Be consistent and + sufficiently descriptive. + +1. Do not add unnecessary comments when the function of the code is + immediately obvious. + + int num_widgets = 0; //number of widgets + +1. Add comments to any code that would not be immediately obvious to + another developer that has never seen that block of code. + Complex algorithms should have outlines in comments as to how they work. + +1. Someone who has never seen your code should be able to read it, + understand it, and find functional areas quickly. Whitespace should + be used as "phrasing" (as in music) to group code into chunks that a + person can easily understand, with comments to make it easier to + navigate. If a person needs to read 10 lines of code to understand + what it does, it should have a one-line comment so the person + doesn't need to read it. Put yourself in the shoes of someone who + has never seen your code before but must quickly fix an important + bug correctly. + +1. Generally, comments should explain **why** and **how**, not **what**. + +1. Comments should usually start above the code and use //. On rare occasions, + comments can make sense to be on the same line, but especially for large and complex if/else blocks. + +1. Be consistent with the rest of the code base when working on + existing projects. This usually means you should follow the + standards for whatever language you're coding. + +1. Sufficient whitespace should be used between numbers, operators, + variables, functions, etc. so that people with dyslexia and other reading concerns + can efficiently read the code. + + //bad + for(size_t i=0;i(mass) / volume; + + //don't do this: + //TODO: set appropriate error code. Or maybe just forget about this unless it becomes a bug. + int error_code = -1; + +1. Use tabs for indentation, one tab per level. Use spaces after the initial tab length. + +1. Preprocessor macros should be one indentation fewer than the code to which it applies. + +1. Curly braces go on their own lines except for do-while loops. + +1. CamelCase should be used for classes, functions, and methods, +lowerCamelCase for all attributes, and lower\_snake\_case for all +variables on the stack or global (parameters, local variables, etc.). + +1. Header include order is the following, and within each header group, + headers should be sorted alphabetically. + 1. Project (local to current project) + 1. 3rd party (local to current project) + 1. System/compiler + 1. Forward declarations + +1. Explicit and specific types are preferrable in declarations, but auto can be allowed. E.g., int32_t is preferable to int. + +1. Grammar and writing style should aspirationally follow the [Chicago Manual of Style](https://en.wikipedia.org/wiki/The_Chicago_Manual_of_Style). diff --git a/LICENSE-3RD-PARTY.txt b/LICENSE-3RD-PARTY.txt new file mode 100644 index 00000000..40f3f267 --- /dev/null +++ b/LICENSE-3RD-PARTY.txt @@ -0,0 +1,400 @@ +3RD PARTY LICENSES + +* Howard Hinnant's Date library + * Source: https://github.com/HowardHinnant/date + * Location: ./src/third_party/date +* fast_log + * Source: https://github.com/nadavrot/fast_log + * Location: ./src/third_party/fast_log +* MurmurHash3 + * Source: https://github.com/aappleby/smhasher + * Location: ./src/third_party/murmurhash3 +* rapidyaml + * Source: https://github.com/biojppm/rapidyaml/releases + * Location: ./src/third_party/rapidyaml +* simdjson + * Source: https://simdjson.org/, https://github.com/simdjson/simdjson + * Location: ./src/third_party/simdjson +* Skarupke flat hash map and bytell hash map + * Source: + * https://github.com/skarupke/flat_hash_map/blob/master/flat_hash_map.hpp + * https://github.com/skarupke/flat_hash_map/blob/master/bytell_hash_map.hpp + * Description: + * https://probablydance.com/2017/02/26/i-wrote-the-fastest-hashtable/ + * https://probablydance.com/2018/05/28/a-new-fast-hash-table-in-response-to-googles-new-fast-hash-table/ + * Location: ./src/third_party/skarupke_maps +* Swift programming language's decimal conversion library (Apple) + * https://github.com/apple/swift/blob/master/include/swift/Runtime/SwiftDtoa.h + * https://github.com/apple/swift/blob/master/stdlib/public/runtime/SwiftDtoa.cpp + * Location: ./src/third_party/swiftdtoa +* TweetNaCl + * Source: + * https://tweetnacl.cr.yp.to + * https://tweetnacl.cr.yp.to/tweetnacl-20140917.pdf + * Location: ./src/third_party/tweetnacl + + +############################################################################# +LICENSE TEXTS +############################################################################# + + +------------------------------------ +Howard Hinnant's Date library +------------------------------------ + + +The MIT License (MIT) + +Copyright (c) 2015, 2016, 2017 Howard Hinnant +Copyright (c) 2016 Adrian Colomitchi +Copyright (c) 2017 Florian Dang +Copyright (c) 2017 Paul Thompson +Copyright (c) 2018, 2019 Tomasz Kamiński +Copyright (c) 2019 Jiangang Zhuang + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +Our apologies. When the previous paragraph was written, lowercase had not yet +been invented (that would involve another several millennia of evolution). +We did not mean to shout. + + +------------------------------------ +fast_log +------------------------------------ + +MIT License + +Copyright (c) 2022 Nadav Rotem + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +------------------------------------ +MurmurHash3 +------------------------------------ + + +MurmurHash3 was written by Austin Appleby, and is placed in the public +domain. The author hereby disclaims copyright to this source code. + + +------------------------------------- +rapidyaml +------------------------------------- + +Copyright (c) 2018, Joao Paulo Magalhaes + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +------------------------------------- +simdjson +------------------------------------- + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2018-2019 The simdjson authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +------------------------------------ +Skarupke flat hash map & bytell hash map +------------------------------------ + + + Copyright Malte Skarupke 2017. +Distributed under the Boost Software License, Version 1.0. + (See http://www.boost.org/LICENSE_1_0.txt) + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + + +------------------------------------ +Swift programming language's decimal conversion library (Apple): +------------------------------------ + + +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2018, 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + + +------------------------------------ +TweetNaCl +------------------------------------ + + +Licensed as "public domain", as described in https://tweetnacl.cr.yp.to/tweetnacl-20140917.pdf diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..4a6005f5 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,196 @@ +Diveplane Corporation + +Free Software License Terms + +PLEASE READ THIS LICENSE AGREEMENT (“Agreement”) CAREFULLY. This Agreement is a +legally binding agreement between you (sometimes referred to as “You” or +“Your”) and Diveplane Corporation (referred to as “Diveplane”, “Us”, “Our”, or +“We”). You and Diveplane are collectively referred to herein as the “Parties”. +By accessing and using the Diveplane Software, You agree to be bound by these +terms and conditions. + + +1. Diveplane Software + + a. Provided as Is. You acknowledge and agree that the Diveplane Reactor + Community Edition (“Diveplane Software”) is provided “as is” with no + warranty express or implied under this Agreement, and for use by You. No + other services, upgrades, or support are being provided to You, but + Diveplane may, at its sole discretion provide upgrades during the Term. + + b. Grant of License for Your Use Only. Subject to the terms and conditions + of this Agreement, Diveplane hereby grants to You a limited, non- + exclusive, non-transferable, non-sublicensable right to install, access, + and use the Diveplane Software and its output (“Diveplane Output”) for + commercial or non-commercial purposes as limited by this Agreement. + + c. Prohibitions. You may not and shall not permit anyone to (a) copy the + Diveplane Software; (b) sublicense, rent, lease, sell, loan, transfer, + distribute, translate, reverse engineer, decompile, or disassemble or + otherwise obtain or attempt to create, derive, or obtain the source or + object code of the Diveplane Software; (c) modify, add content to, + enhance, prepare derivative works from or otherwise change the Diveplane + Software; (d) bypass or breach any security device or other protection + used by the Diveplane Software; (e) damage, destroy, disable, interfere + with or otherwise impede or harm the Diveplane Software, Diveplane + systems or Diveplane’s provision of services to or from any third party, + in whole or in part; (f) access or use the Diveplane Software or other + services for purposes of competitive analysis of or creation of + competitive software to the Diveplane Software. + + d. Remote Network Interaction. Notwithstanding any other provision of this + Agreement, this agreement does not permit You to offer services that use + the Diveplane Software over a network (“Network Services”) that allows + users interacting with programs that use the Diveplane Software remotely + through a computer network. + + e. Distribution. Notwithstanding any other provision of this Agreement, + this Agreement does not permit distribution or conveying the Diveplane + Software, including embedded in other software or hardware products, to + anyone else, nor can you provide the Diveplane Software for download, or + place it on a computer or server so that it may be downloaded by another + (“Redistributed Copies”). Redistributed Copies are not licensed under + this Agreement. + + f. Suspension. Diveplane may, and You shall assist Diveplane to, suspend, + terminate or otherwise deny Your and/or any User’s, or any other + person’s access to or use of all or any part of the Diveplane Software + without incurring any resulting obligation or liability, if: (i) + required by a judicial or other governmental demand; or (ii) Diveplane + believes, in its reasonable discretion, that You or any User has failed + to comply with any material term of this Agreement; or (iii) this + Agreement expires or is terminated. Diveplane may terminate this + Agreement upon the occurrence of (i) or (ii) in this Section. + + g. Ownership. As between Diveplane and You, Diveplane owns and shall own + all right, title, and interest in and to the Diveplane Software and all + products or services developed by Diveplane in relation thereto, during + and after the Term. + + h. Your Data and Usage Data. You have sole ownership and/or rights to all + Your data supplied by You to the Diveplane Software (“Your Data”). You + acknowledge, consent and agree to Diveplane’s collection, use, + processing and storage of usage related content from the computer or + other devices used to access the Diveplane Software. This may include, + but is not necessarily limited to, version number, IP addresses and + other information like internet service, location, the type of browser + and modules that are used and/or accessed (“Usage Data”). For avoidance + of doubt, Usage Data is not Your Data. + + i. Term and Effect of Termination or Expiration. The term of this Agreement + runs from the date You first download, access, or use the Diveplane + Software and runs until December 31, 2023 (“Term”) unless earlier + terminated. You may terminate this Agreement at will. Upon any + termination of this Agreement, all rights, licenses, consents and + authorizations granted by either party to the other hereunder will + immediately terminate and You shall immediately deactivate the + applicable access credentials, including those of Users, and delete, + uninstall, and / or remove all copies of the Diveplane Software and any + derivate works. + + j. Survival. Those provisions of this Agreement that by their nature are + intended to survive termination or expiration of this Agreement shall + so survive. + + +2. Disclaimers and No Warranties + + a. General. The Diveplane Software is provided “as is” and no warranties, + express or implied, are provided for the software. You acknowledge that: + (i) the Diveplane Software may contain errors, design flaws or other + problems; (ii) access to and/or use of the Diveplane Software may result + in unexpected results, loss of Your Data, project delays or other + unpredictable damage or loss, and that You should backup all relevant + systems and maintain copies of all Your Data used in the Diveplane + Software under this Agreement; and (iii) by entering into this Agreement, + Diveplane is under no obligation to enter into any subsequent agreement + with You related to the Diveplane Software. For the avoidance of doubt, + You agree and acknowledge that loss of data and records is a risk in + connection with this Agreement and, therefore, agree to configure and + backup Your computer system environment and data to prevent the + activities contemplated hereunder from causing business interruption, + damage to systems, loss of Your Data, or other loss or damage. EXCEPT AS + SET FORTH HEREIN, YOU AGREE THAT DIVEPLANE HAS NO OBLIGATION OR LIABILITY + FOR ANY LOSS, ALTERATION, DESTRUCTION, DAMAGE, CORRUPTION OR RECOVERY OF + YOUR DATA OR SYSTEMS. You represent, warrant, and covenant to Diveplane + that You own or otherwise have and will have the necessary rights and + consents in and relating to the Your Data so that, as received by the + Diveplane Software and processed in accordance with this Agreement, they + do not and will not infringe, misappropriate or otherwise violate any + intellectual property rights, privacy or other rights of any third party + or violate any applicable law. + + b. DIVEPLANE MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, + REGARDING USE OF OR RELIANCE ON THE DIVEPLANE SOFTWARE, AND THE DIVEPLANE + SOFTWARE IS PROVIDED “AS IS.” YOU HEREBY WAIVE, RELEASE AND DISCLAIM ALL + OTHER WARRANTIES, OBLIGATIONS AND LIABILITIES OF DIVEPLANE AND ALL OTHER + REMEDIES, RIGHTS AND CLAIMS OF YOURS EXPRESS OR IMPLIED, ARISING BY LAW + OR OTHERWISE, WITH RESPECT TO ANY DEFECT, DEFICIENCY, DATA ACCURACY, OR + NONCONFORMITY IN THE SOFTWARE OR ANYTHING ELSE FURNISHED UNDER THIS + AGREEMENT, INCLUDING BUT NOT LIMITED TO ANY: (A) IMPLIED WARRANTY OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE; (B) IMPLIED WARRANTY + ARISING FROM COURSE OF PERFORMANCE, COURSE OF DEALING, OR USAGE OF TRADE; + (C) ANY OBLIGATION, LIABILITY, RIGHT, REMEDY OR CLAIM IN TORT, + NOTWITHSTANDING ANY FAULT, NEGLIGENCE, STRICT LIABILITY OR PRODUCT + LIABILITY OF DIVEPLANE (WHETHER ACTIVE, PASSIVE OR IMPUTED); AND (D) + CLAIM OF INFRINGEMENT. DIVEPLANE DOES NOT WARRANT THAT OPERATION OF OR + USE OF THE DIVEPLANE SOFTWARE WILL BE UNINTERRUPTED, ERROR-FREE, OR + ENTIRELY SECURE. YOU WILL BEAR ALL RISK ASSOCIATED WITH ANY USE OF THE + INTERNET OR OTHER MEANS OF COMMUNICATION OR DATA TRANSMISSION BY OR ON + BEHALF OF YOU UNDER THIS AGREEMENT, AND DIVEPLANE DISCLAIMS ALL LIABILITY + AND RESPONSIBILITY IN CONNECTION WITH SUCH USE. Further, You agree to + indemnify and hold Diveplane harmless from and against any third party + damages, claims or other liabilities directly or indirectly caused or + arising out of any of the following: (i) Your use or non-use of the + Diveplane Software; (ii) any content, results or output from any + Diveplane Software; (iii) any recommendations based on the content, + results or output from any Diveplane Software; (iv) Your failure to + identify and correct any inaccuracies and/or errors in the content, + results or output of any Diveplane Software; and/or (v) unauthorized or + illegal use of Your Data in the Diveplane Software. + + c. IN NO EVENT SHALL DIVEPLANE BE LIABLE FOR ANY INDIRECT, SPECIAL, + INCIDENTAL, CONSEQUENTIAL OR PUNITIVE DAMAGES, LOSS OF USE OR DATA, + INTERRUPTION OF BUSINESS, OR ANY OTHER DAMAGES, WHETHER SUCH ALLEGED + DAMAGES ARE LABELED IN TORT, CONTRACT OR INDEMNITY, INCLUDING, BUT NOT + LIMITED TO, LOSS OF REVENUE AND LOSS OF PROFITS, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGES. + + +3. Other Provisions + + a. Publicity. If You use the Diveplane Software or its output (“Diveplane + Software Output”) commercially or in or as a basis of a publication (such + as a book, paper, blog post, direct publishing of the Diveplane Software + Output, etc.) in any medium, You shall and agree to conspicuously publish + an appropriate notice that indicates Your use of the Diveplane Software + and / or the Diveplane Software Output. + + b. Independent Parties. The parties agree that this Agreement governs the + relationship between Diveplane and You for the license of Reactor + Community Edition. Nothing contained in this Agreement shall be construed + as creating a joint venture, partnership, agent, or employment + relationship between Diveplane and You or any User. + + c. Severability. If any term or provision of this Agreement should be + declared invalid by a court of competent jurisdiction or by operation of + law, the remaining terms and provisions of this Agreement shall be + unimpaired, and the invalid term or provision shall be replaced by such + valid term or provision as comes closest to its underlying intention. + + d. No Assignment. You cannot assign, delegate, or otherwise transfer in any + way either this Agreement or any of your rights, duties or + obligations hereunder. + + e. Applicable Law. This Agreement will be interpreted, construed, and + enforced in all respects in accordance with the laws of the State of + Delaware, including its statutes of limitation, but without reference to + its choice of law principles. Each party hereby consents to the + jurisdiction and venue of the state and federal courts located in North + Carolina, with regard to any suit or claim arising under or by reason of + this Agreement. + + f. Compliance with Laws. You shall comply with all applicable laws, + regulations, rules, orders, and other requirements, now or hereafter in + effect, of any applicable governmental authority, in its performance of + activities hereunder. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 00000000..b878b524 --- /dev/null +++ b/README.md @@ -0,0 +1,268 @@ +# Amalgam™ + +**Table of Contents** + +1. [Introduction](#introduction) +1. [Programming in Amalgam](#programming-in-amalgam) + * [IDE Syntax Highlighting](#ide-syntax-highlighting) + * [IDE Debugging](#ide-debugging) +1. [Amalgam Interpreter](#amalgam-interpreter) + * [Recommended System Specs](#recommended-system-specs) + * [Pre-built Binaries](#pre-built-binaries) + * [Dev/local Builds](#devlocal-builds) + * [Usage](#usage) +1. [Contributing](#contributing) + +## Introduction + +Amalgam™ is a domain specific language ([DSL](https://en.wikipedia.org/wiki/Domain-specific_language)) developed primarily for [genetic programming](https://en.wikipedia.org/wiki/Generic_programming) and [instance based machine learning](https://en.wikipedia.org/wiki/Instance-based_learning), but also for simulation, agent based modeling, data storage and retrieval, the mathematics of probability theory and information theory, and game content and AI. The language format is somewhat LISP-like in that it uses parenthesized list format with prefix notation and is geared toward functional programming, where there is a one-to-one mapping between the code and the corresponding parse tree. + +Whereas virtually all practical programming languages are primarily designed for some combination of programmer productivity and computational performance, Amalgam prioritizes code matching and merging, as well as a deep equivalence of code and data. Amalgam uses _entities_ to store code and data, with a rich query system to find entities by their _labels_. The language uses a variable stack, but all attributes and methods are stored directly as labels in entities. There is no separate class versus instance, but entities can be used as prototypes to be copied and modified. Though code and data are represented as trees from the root of each entity, graphs in code and data structures are permitted and are flattened to code using special references. Further, instead of failing early when there is an error, Amalgam supports genetic programming and code mixing by being extremely weakly typed, and attempts to find a way to execute code no matter whether types match or not. + +Amalgam takes inspiration from many programming languages, but those with the largest influence are LISP, Scheme, Haskell, Perl, Smalltalk, and Python. Despite being much like LISP, there is deliberately no macro system. This is to make sure that code is semantically similar whenever the code is similar, regardless of context. It makes it easy to find the difference between x and y as an executable patch, and then apply that patch to z as `(call (difference x y) (assoc _ z))`, or semantically mix blocks of code a and b as `(mix a b)`. Amalgam is not a purely functional language. It has imperative and object oriented capabilities, but is primarily optimized for functional programming with relatively few opcodes that are functionally flexible based on parameters to maximize flexibility with code mixing and matching. + +As Amalgam was designed with genetic programming in mind, there is alway a chance that an evolved program ends up consuming more CPU or memory resources than desired, or may attempt to affect the system outside of the interpreter. For these reasons, there are many strict sandboxing aspects of the language with optional constraints on access, CPU, and memory. The interpreter also has a permissions system, with only the root entity having access to system commands (though it can give other entities root permissions), and entities cannot have any effect on containing entities unless the container offers an executable label to contained entities. + +The Amalgam interpreter was designed to be used a standalone interpreter and to build functionality for other programming languages and environments. It does not currently have rich support for linking C libraries into the language, but that is planned for future functionality. + +Initial development on Amalgam began in 2011. It was first offered as a commercial product in 2014 at Hazardous Software Inc. and was open sourced in September 2023 by Howso Incorporated (formerly known as Diveplane Corporation, a company spun out of Hazardous Software Inc.). + +When referencing the language: 'Amalgam', 'amalgam', 'amalgam-lang', and 'amalgam language' are used interchangeably with **Amalgam** being preferred. When referencing the interpreter: 'Amalgam interpreter', 'interpreter', 'Amalgam app', and 'Amalgam lib' are used interchangeably. + +### Programming in Amalgam + +See the [Amalgam beginner's guide](AMALGAM-BEGINNER-GUIDE.md) to get started. + +Full Amalgam language usage documentation is located in the [Amalgam Language Reference](https://howsoai.github.io/amalgam). + +Further examples can be found in the [examples](examples/README.md) directory. + +The primary file extensions consist of: +* `.amlg` - Amalgam script +* `.mdam` - Amalgam metadata, primarily just current random seed +* `.caml` - compressed Amalgam for fast storage and loading, that may contain many entities. + +### IDE Syntax Highlighting + +Syntax highlighting is provided as a plugin for 2 major vendors: + +* [VSCode Plugin](https://github.com/howsoai/amalgam-ide-support-vscode) +* [Notepad++ Plugin](https://github.com/howsoai/amalgam-ide-support-npp) + +Since debugging Amalgam code is only supported in VSCode, it is recommended that VSCode be the main IDE for writing and debugging Amalgam code. + +### IDE Debugging + +Debugging Amalgam is supported through the [VSCode Plugin](https://github.com/howsoai/amalgam-ide-support-vscode). + +## Amalgam Interpreter + +The Amalgam interpreter is written in C++ and uses the newest standards to create a fast, cross-platform experience when running Amalgam code. + +### Recommended System Specs + +At least 8 physical cores and 16GB of RAM. + +Although the interpreter itself can run on very few system resources, the above recommendation is based on the typical type of workloads that are compute and memory intensive. + +### Pre-built Binaries + +Pre-built binaries are provided for specific target systems. They are as statically linked as possible without overly complicating the build. + +#### Build Matrix + +An interpreter application and shared library (dll/so/dylib) are built for each release. A versioned tarball is created for each target platform in the build matrix: + +| Platform | Variants 1,2 | Automated Testing | Notes | +|------------------------------|-------------------------------|:------------------:|-------| +| Windows amd64 | MT, ST, OMP, MT-NoAVX | :heavy_check_mark: | | +| Linux amd64 | MT, ST, OMP, MT-NoAVX, ST-PGC | :heavy_check_mark: | ST-PGC is for testing only, not packaged for release | +| Linux arm64: 8.2-a+simd+rcpc | MT, ST, OMP | :heavy_check_mark: | Tested with [qemu](https://www.qemu.org/) | +| Linux arm64: 8-a+simd | ST | :heavy_check_mark: | Tested with [qemu](https://www.qemu.org/) | +| macOS amd64 | MT, ST, OMP, MT-NoAVX | :heavy_check_mark: | Only **MT-NoAVX** tested currently | +| macOS arm64: 8.4-a+simd | MT, ST, OMP | :x: | Manually tested, M1 and newer supported | +| WASM 64-bit | ST | :x: | Built on linux using emscripten. Planned testing: headless test with node + jest | + +* 1 Variant meanings: + * MT + * Multi-threaded + * Binary postfix: '-mt' + * Interpreter uses threads for parallel operations to increase throughput at the cost of some overhead + * ST + * Single-threaded + * Binary postfix: '-st' + * Interpreter does not use multiple threads for parallel operations + * OMP + * [OpenMP](https://en.wikipedia.org/wiki/OpenMP) + * Binary postfix: '-omp' + * Interpreter uses OpenMP threading internally to minimize latency of query operations + * MT-NoAVX + * Multi-threaded but without AVX intrinsics + * Binary postfix: '-mt-noavx' + * Interpreter uses threads for parallel operations to increase throughput at the cost of some overhead but without AVX intrinsics + * Useful in emulators, virtualized environments, and older hardware that don't have AVX support + * amd64 only, as AVX intrinsics are only applicable to variants of this architecture + * ST-PGC + * Single-threaded with pedantic garbage collection (PGC) + * Binary postfix: '-st-pgc' + * Interpreter does not use any threads and performs garbage collection at every operation + * Very slow by nature, intended only be used for verification during testing or debugging +* 2 Most platforms create a bare binary with no postfix + * These are just clones of their respective MT binary and can be used for better symlinking, cleaner library linking, and simpler CLI usage. + +#### Build Tools + +Pre-built binaries use CMake+Ninja for CI/CD. See [PR workflow](.github/workflows/create-pr-build.yml) for automated build steps. + +Though Amalgam is intended to support any C++17 compliant compiler, the current specific tool and OS versions used are: + +* CMake 3.23 +* Ninja 1.11 +* Windows: + * Visual Studio 2022 v143 +* Linux: + * Ubuntu 20.04, gcc-10 +* macOS (Darwin): + * macOS 11, AppleClang 13.0 +* WASM: + * Ubuntu 20.04, emscripten 3.1.32 + +#### Runtime Requirements + +Running the pre-built interpreter has specific runtime requirements per platform: + +##### All + +* 64-bit OS + +##### amd64 + +* [AVX2](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#Advanced_Vector_Extensions_2) intrinsics + * A "no-avx" variant of the multi-threading binary is built on amd64 platforms for environments without AVX2 intrinsics. + * See [Dev/local Builds](#devlocal-builds) for compiling with other intrinsics (i.e., AVX512) + +##### Windows + +* OS: + * Microsoft Windows 10 or later + * Microsoft Windows Server 2016 or later +* Arch: amd64 + +##### Linux + +* glibc 2.29 or later +* Arch: amd64 or arm64 + * Specific arm64 builds: `armv8-a+simd` & `armv8.2-a+simd+rcpc` + +##### macOS (Darwin) + +* macOS 11 or higher +* Arch: amd64 or arm64 + * Specific arm64 builds: `armv8.4-a+simd` (M1 or later) + +##### WASM + +WASM support is still experimental. + +* No specific runtime requirements at this time + +### Dev/local Builds + +Dev and local builds can be either run using a CLI or IDE. + +#### CLI + +The workflow for building the interpreter on all platforms is fairly straightforward. [CMake Presets](https://cmake.org/cmake/help/latest/manual/cmake-presets.7.html) are used to define all settings in the [CMake presets file](CMakePresets.json). + +Example for release build on linux amd64: + +```bash +PRESET="amd64-release-linux" +cmake --preset $PRESET # configure/generate (./out/build) +cmake --build --preset $PRESET # build +cmake --build --preset $PRESET --target test # test +cmake --build --preset $PRESET --target install # install (./out/install) +cmake --build --preset $PRESET --target package # package (./out/package) +``` + +The above performs a local "build install". For specifying a custom location, run install with an install prefix. Depending on permissions, admin access (admin elevated prompt on Windows, sud/su on linux/macos) might be needed: + +```bash +cmake -DCMAKE_INSTALL_PREFIX="/path/to/install/location" --build --preset $PRESET --target install +``` + +Depending on the platform, not all tests will run successfully out of the box, especially when cross compiling. For those cases (i.e., arm64 on Mac M1 or AVX2/AVX512), the tests that are runnable on the specific platform can be included/excluded by running CTest directly (not through CMake, like above): + +```bash +ctest --preset $PRESET --label-exclude 'advanced_intrinsics' +``` + +To see all available test labels: + +```bash +ctest --preset $PRESET --print-labels +``` + +All CTest run options can be on the [CMake website](https://cmake.org/cmake/help/latest/manual/ctest.1.html#run-tests). + +#### IDE + +Automation uses the CMake generated build system (ninja), but Visual Studio or VSCode are the best options for local development. In general, VScode is recommended as it provides the most uniform developer experience across platforms. + +For the best C++ developer experience, Visual Studio on Windows is the ideal development environment (no paid features needed, VS Community edition works). A helper script [open-in-vs.bat](open-in-vs.bat) is provided to set-up the CLI with VS build tools and open the IDE. It can be used to open multiple variants of the Windows build: + +1. Default (no args) : Visual Studio solution (CMake generated, "amd64-windows-vs" preset) + * `open-in-vs.bat` +1. vs_cmake : Visual Studio directory (load from directory with CMake file) + * `open-in-vs.bat vs_cmake` +1. vscode : VSCode directory (load from directory with CMake file) + * `open-in-vs.bat vscode` +1. vs_static : Visual Studio solution (local static non-CMake generated: [Amalgam.sln](Amalgam.sln)) + * `open-in-vs.bat vs_static` + +Note: on Windows, some issues have been found with using the CMake generated VS solutions and the native CMake support in Visual Studio and VSCode. If the developer experience is unstable, it is recommended that the `vs_static` build be used instead of the CMake generated build. It is planned to (eventually) deprecate this static VS solution when CMake support in VS becomes more stable. + +#### Build Customizations + +Some specific build customizations are important to note. These customizations can be altered in the main [CMake file](CMakeLists.txt#L1): + +* [Compiler options](build/cmake/global_compiler_flags.cmake) +* [arm64 arch](build/cmake/global_compiler_flags.cmake#L90) +* [amd64 AVX intrinsics](build/cmake/global_compiler_flags.cmake#L126) +* [Custom testing](build/cmake/create_tests.cmake) + +### Debugging + +For debugging the C++ code, example launch files are provided in [launch.json](docs/launch.example.json) (VSCode) and [launch.vs.json](docs/launch.vs.example.json) (Visual Studio) when opening a folder as a CMake project. + +Remote debugging for linux is supported in both IDEs. + +### Usage + +Given an Amalgam interpreter, usage is similar to other popular interpreters. + +Basic usage description and CLI options can be retrieved by running the binary without any parameters: + +```bash +./amalgam-mt +``` + +Run an Amalgam script: + +```bash +./amalgam-mt test.amlg +``` + + If the Amalgam file begins with a shebang (#!) followed by the path to the executable binary, which, coincidentally is the syntax for a private label, the script can be run as: + + ```bash + ./test.amlg + ``` + +## License + +[License](LICENSE.txt) + +## Contributing + +[Contributing](CONTRIBUTING.md) diff --git a/build/cmake/configure_files/AmalgamVersion.h.in b/build/cmake/configure_files/AmalgamVersion.h.in new file mode 100644 index 00000000..f22f7723 --- /dev/null +++ b/build/cmake/configure_files/AmalgamVersion.h.in @@ -0,0 +1,23 @@ +#pragma once + +//this is the version number of Amalgam +#define AMALGAM_VERSION_MAJOR @AMALGAM_VERSION_MAJOR@ +#define AMALGAM_VERSION_MINOR @AMALGAM_VERSION_MINOR@ +#define AMALGAM_VERSION_PATCH @AMALGAM_VERSION_PATCH@ +#define AMALGAM_VERSION_SUFFIX "@AMALGAM_VERSION_SUFFIX@" + +//helper macro functions to convert the version numbers above to a string +//seems unusual, but need an extra layer of indirection to ensure that the preprocessor macros are converted to strings +// this is known regarding the c preprocessor standard and its necessity has been tested when used multiple times in a single file +#define VERSION_NUMBER_TO_STRING(x) #x +#define VERSION_NUMBER_STRING(x) VERSION_NUMBER_TO_STRING(x) + +//macro that can be used to get a string of the current version +#define AMALGAM_VERSION_STRING \ + VERSION_NUMBER_STRING(AMALGAM_VERSION_MAJOR) "." \ + VERSION_NUMBER_STRING(AMALGAM_VERSION_MINOR) "." \ + VERSION_NUMBER_STRING(AMALGAM_VERSION_PATCH) \ + AMALGAM_VERSION_SUFFIX + +//data that can be accessed via system command; any length is allowed +#define AMALGAM_BUILT_IN_DATA { 0x00, 0x00, 0x00, 0x00 } diff --git a/build/cmake/create_no_suffix_artifacts.cmake b/build/cmake/create_no_suffix_artifacts.cmake new file mode 100644 index 00000000..ec83b1ca --- /dev/null +++ b/build/cmake/create_no_suffix_artifacts.cmake @@ -0,0 +1,46 @@ +# +# No suffix default targets +# + +# Don't create defaut targets for WASM +if(NOT IS_WASM) + # No suffix app target: + set(NO_SUFFIX_APP_TARGET "${PROJECT_NAME}-mt-app") + if(IS_ARM64_8A) + # No mt target on arm64 8-a so default is based off st. + set(NO_SUFFIX_APP_TARGET "${PROJECT_NAME}-st-app") + endif() + set(NO_SUFFIX_APP_NAME "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}${CMAKE_EXECUTABLE_SUFFIX}") + add_custom_target("create-no-suffix-app" ALL + COMMENT "Creating default named app" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "${NO_SUFFIX_APP_NAME}" + BYPRODUCTS ${NO_SUFFIX_APP_NAME} + ) + set_target_properties("create-no-suffix-app" PROPERTIES FOLDER "OtherBuildTargets") + install(FILES ${NO_SUFFIX_APP_NAME} DESTINATION bin PERMISSIONS ${DEFAULT_INSTALL_PERMISSIONS}) + + # No suffix sharedlib target: + set(NO_SUFFIX_LIB_TARGET "${PROJECT_NAME}-mt-sharedlib") + if(IS_ARM64_8A) + # No mt target on arm64 8-a so default is based off st. + set(NO_SUFFIX_LIB_TARGET "${PROJECT_NAME}-st-sharedlib") + endif() + set(NO_SUFFIX_LIB_NAME "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}${PROJECT_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}") + add_custom_target("create-no-suffix-sharedlib" ALL + COMMENT "Creating default named lib" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "${NO_SUFFIX_LIB_NAME}" + BYPRODUCTS ${NO_SUFFIX_LIB_NAME} + ) + set_target_properties("create-no-suffix-sharedlib" PROPERTIES FOLDER "OtherBuildTargets") + install(FILES ${NO_SUFFIX_LIB_NAME} DESTINATION lib PERMISSIONS ${DEFAULT_INSTALL_PERMISSIONS}) + if(IS_WINDOWS) + set(NO_SUFFIX_LINKER_LIB_NAME "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}amalgam${CMAKE_STATIC_LIBRARY_SUFFIX}") + add_custom_target("create-no-suffix-linkinglib" ALL + COMMENT "Creating default named linking lib" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "$" "${NO_SUFFIX_LINKER_LIB_NAME}" + BYPRODUCTS ${NO_SUFFIX_LINKER_LIB_NAME} + ) + set_target_properties("create-no-suffix-linkinglib" PROPERTIES FOLDER "OtherBuildTargets") + install(FILES ${NO_SUFFIX_LINKER_LIB_NAME} DESTINATION lib PERMISSIONS ${DEFAULT_INSTALL_PERMISSIONS}) + endif() +endif() diff --git a/build/cmake/create_package.cmake b/build/cmake/create_package.cmake new file mode 100644 index 00000000..ea09009c --- /dev/null +++ b/build/cmake/create_package.cmake @@ -0,0 +1,18 @@ +# +# Package target +# + +set(CPACK_GENERATOR "TGZ") +set(CPACK_PACKAGE_NAME "${PROJECT_NAME}") +set(CPACK_PACKAGE_VERSION "${AMALGAM_VERSION_FULL}") +if(NOT IS_WASM) + set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${OSv2}-${ARCH}") +else() + set(CPACK_PACKAGE_FILE_NAME "${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION}-${ARCH}") +endif() +set(CPACK_OUTPUT_FILE_PREFIX "${CMAKE_INSTALL_PREFIX}/../../package") +set(CPACK_INCLUDE_TOPLEVEL_DIRECTORY False) +set(CPACK_STRIP_FILES True) +set(CPACK_THREADS 0) +set(CPACK_ARCHIVE_THREADS 0) +include(CPack) diff --git a/build/cmake/create_static_targets.cmake b/build/cmake/create_static_targets.cmake new file mode 100644 index 00000000..b78a6415 --- /dev/null +++ b/build/cmake/create_static_targets.cmake @@ -0,0 +1,33 @@ +# +# Custom read-only targets with other files from source tree for convenience +# + +# Build files: +file(GLOB_RECURSE CONFIG_FILES "build/*") +list(APPEND CONFIG_FILES + .gitignore + CMakeLists.txt + CMakePresets.json + open-in-vs.bat +) +add_custom_target("${PROJECT_NAME}-build" SOURCES ${CONFIG_FILES}) +set_target_properties("${PROJECT_NAME}-build" PROPERTIES FOLDER "Utilities" EXCLUDE_FROM_ALL True) + +# Scripts: +file(GLOB_RECURSE SCRIPT_FILES "Amalgam/amlg_code/*") +add_custom_target("${PROJECT_NAME}-scripts" SOURCES ${SCRIPT_FILES}) +set_target_properties("${PROJECT_NAME}-scripts" PROPERTIES FOLDER "Utilities" EXCLUDE_FROM_ALL True) + +# Docs: +file(GLOB_RECURSE DOC_FILES "docs/*") +list(APPEND DOC_FILES README.md LICENSE-3RD-PARTY.txt) +add_custom_target("${PROJECT_NAME}-docs" SOURCES ${DOC_FILES}) +set_target_properties("${PROJECT_NAME}-docs" PROPERTIES FOLDER "Utilities" EXCLUDE_FROM_ALL True) + +# Examples: +file(GLOB_RECURSE EXAMPLE_FILES "examples/*") +add_custom_target("${PROJECT_NAME}-examples" SOURCES ${EXAMPLE_FILES}) +set_target_properties("${PROJECT_NAME}-examples" PROPERTIES FOLDER "Utilities" EXCLUDE_FROM_ALL True) + +# Set source group for all static projects: +source_group(TREE ${CMAKE_SOURCE_DIR} FILES ${CONFIG_FILES} ${SCRIPT_FILES} ${DOC_FILES} ${EXAMPLE_FILES}) diff --git a/build/cmake/create_tests.cmake b/build/cmake/create_tests.cmake new file mode 100644 index 00000000..9f24feb1 --- /dev/null +++ b/build/cmake/create_tests.cmake @@ -0,0 +1,93 @@ +# +# Tests +# + +# CTest args: +set(CMAKE_CTEST_ARGUMENTS "-j" "--schedule-random" "--output-on-failure" "--output-log" "${PROJECT_SOURCE_DIR}/out/test/all_tests.log") + +# Not all tests can be run on all platforms: +if(IS_MACOS) + if(IS_ARM64) + # Can't run cross compiled arm64 binaries on macos amd64 hosts + list(PREPEND CMAKE_CTEST_ARGUMENTS "-LE" ".*") + else() + # Can't run advanced intrinsic builds on macos amd64 build machines + list(PREPEND CMAKE_CTEST_ARGUMENTS "-LE" "advanced_intrinsics") + endif() +elseif(IS_WASM) + # No tests to run for WASM + list(PREPEND CMAKE_CTEST_ARGUMENTS "-LE" ".*") +else() + list(PREPEND CMAKE_CTEST_ARGUMENTS "-L" "smoke_test") +endif() + +# Test runner for platforms that need it: +set(TEST_RUNNER) +if(IS_LINUX AND IS_ARM64) + set(TEST_RUNNER "qemu-aarch64" "-L" "${ARM64_LIB_DIR}") +endif() + +# Create tests for every app target: +set(ALL_TEST_TARGETS) +set(TEST_OUTPUT_LOG_BASE "${CMAKE_INSTALL_PREFIX}/../../test") +foreach(TEST_TARGET ${ALL_APP_TARGETS}) + + # No args test: + set(TEST_NAME "App.NoArgs.${TEST_TARGET}_noargs") + set(TEST_OUTPUT_LOG "${TEST_OUTPUT_LOG_BASE}/out.${TEST_NAME}.txt") + add_test(NAME ${TEST_NAME} + COMMAND ${TEST_RUNNER} "$" + ) + list(APPEND ALL_TEST_TARGETS ${TEST_NAME}) + + # Version test: + set(TEST_NAME "App.Version.${TEST_TARGET}_getversion") + set(TEST_OUTPUT_LOG "${TEST_OUTPUT_LOG_BASE}/out.${TEST_NAME}.txt") + add_test(NAME ${TEST_NAME} + COMMAND ${TEST_RUNNER} "$" --version + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} + ) + set_tests_properties(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION "${AMALGAM_VERSION_FULL_ESCAPED}") + list(APPEND ALL_TEST_TARGETS ${TEST_NAME}) + + # Full test: + set(TEST_NAME "App.FullTest.${TEST_TARGET}_fulltests") + set(TEST_OUTPUT_LOG "${TEST_OUTPUT_LOG_BASE}/out.${TEST_NAME}.txt") + add_test( + NAME ${TEST_NAME} + COMMAND ${TEST_RUNNER} "$" -l ${TEST_OUTPUT_LOG} amlg_code/full_test.amlg + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/src/Amalgam + ) + set_tests_properties(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION "--total execution time--") + list(APPEND ALL_TEST_TARGETS ${TEST_NAME}) + +endforeach() + +# Create tests for every lib target: +foreach(TEST_TARGET ${ALL_SHAREDLIB_TARGETS}) + + # Create test exe: + set(TEST_EXE_NAME "${TEST_TARGET}-tester") + add_executable(${TEST_EXE_NAME} "test/lib_smoke_test/main.cpp" "test/lib_smoke_test/test.amlg") + set_target_properties(${TEST_EXE_NAME} PROPERTIES FOLDER "Testing") + target_link_libraries(${TEST_EXE_NAME} ${TEST_TARGET}) + + # Test for test exe: + set(TEST_NAME "Lib.SmokeTest.${TEST_EXE_NAME}") + add_test(NAME ${TEST_NAME} + COMMAND ${TEST_RUNNER} "$" test.amlg + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/lib_smoke_test + ) + set_tests_properties(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION "${AMALGAM_VERSION_FULL_ESCAPED}") + list(APPEND ALL_TEST_TARGETS ${TEST_NAME}) + +endforeach() + +# Add common test labels: +foreach(TEST_TARGET ${ALL_TEST_TARGETS}) + set(TEST_LABELS smoke_test) + if(IS_AMD64 AND NOT "${TEST_TARGET}" MATCHES "${NO_ADVANCED_INTRINSICS_AMD64_SUFFIX}") + list(APPEND TEST_LABELS "advanced_intrinsics") + endif() + set_tests_properties(${TEST_TARGET} PROPERTIES LABELS "${TEST_LABELS}") +endforeach() diff --git a/build/cmake/custom_add_target.cmake b/build/cmake/custom_add_target.cmake new file mode 100644 index 00000000..24455213 --- /dev/null +++ b/build/cmake/custom_add_target.cmake @@ -0,0 +1,274 @@ +# +# Function for creating custom compiled targets +# +# Notes: +# 1) PGC = Pedantic Garbage Collection +# 2) NO_INSTALL implies don't package, when given the target will be tested but not installed/packaged +# + +set(ALL_OBJLIB_TARGETS) +set(ALL_SHAREDLIB_TARGETS) +set(ALL_APP_TARGETS) +function(add_compiled_target) + set(options AUTO_NAME USE_THREADS USE_OPENMP USE_PGC USE_ADVANCED_ARCH_INTRINSICS NO_INSTALL) + set(oneValueArgs NAME TYPE OUTPUT_NAME_BASE IDE_FOLDER) + set(multiValueArgs INCLUDE_DIRS COMPILER_DEFINES LINK_LIBRARIES SOURCE APP_ONLY_SOURCE) + cmake_parse_arguments(args "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + # Error if unknown args passed in: + if(NOT "${args_UNPARSED_ARGUMENTS}" STREQUAL "") + message(FATAL_ERROR "Unparsed args: ${args_UNPARSED_ARGUMENTS}") + elseif(NOT "${args_KEYWORDS_MISSING_VALUES}" STREQUAL "") + message(FATAL_ERROR "Args missing values: ${args_KEYWORDS_MISSING_VALUES}") + endif() + + # Set local scope optional args: + set(AUTO_NAME ${args_AUTO_NAME}) + set(USE_THREADS ${args_USE_THREADS}) + set(USE_OPENMP ${args_USE_OPENMP}) + set(USE_PGC ${args_USE_PGC}) + set(USE_ADVANCED_ARCH_INTRINSICS ${args_USE_ADVANCED_ARCH_INTRINSICS}) + set(NO_INSTALL ${args_NO_INSTALL}) + + # For armv8-a targets, we do not build any threading binaries: + if(IS_ARM64_8A AND (USE_THREADS OR USE_OPENMP)) + return() + endif() + + # Validate naming combination: + if(AUTO_NAME AND NOT "${args_NAME}" STREQUAL "") + message(FATAL_ERROR "AUTO_NAME and NAME cannot both be set") + endif() + + # Validate target type: + set(TARGET_TYPES app sharedlib objlib) + if("${args_TYPE}" STREQUAL "") + message(FATAL_ERROR "Must supply target type: TYPE {${TARGET_TYPES}}") + elseif(NOT "${args_TYPE}" IN_LIST TARGET_TYPES) + message(FATAL_ERROR "Unknown target type: ${args_TYPE}") + endif() + set(TARGET_TYPE "${args_TYPE}") + + # Set vars based on target type: + set(IS_OBJLIB False) + set(IS_SHAREDLIB False) + set(IS_APP False) + if("${TARGET_TYPE}" STREQUAL "objlib") + set(IS_OBJLIB True) + elseif("${TARGET_TYPE}" STREQUAL "sharedlib") + set(IS_SHAREDLIB True) + elseif("${TARGET_TYPE}" STREQUAL "app") + set(IS_APP True) + endif() + + # Validate threads vs openmp: + if(USE_THREADS AND USE_OPENMP) + message(FATAL_ERROR "Threads + OpenMP builds are not supported") + endif() + + # Validate PGC vs threads/openmp: + if(USE_PGC AND (USE_THREADS OR USE_OPENMP)) + message(FATAL_ERROR "Pedantic garbage collection (PGC) + Threads/OpenMP builds are not supported") + endif() + + # Construct target name: + # Note: autonames target if asked to or accepts a hardcoded name from caller + set(TARGET_NAME_BASE) + set(TARGET_NAME) + if(NOT "${args_NAME}" STREQUAL "") + set(TARGET_NAME_BASE "${args_NAME}") + set(TARGET_NAME "${TARGET_NAME_BASE}") + elseif(AUTO_NAME) + set(TARGET_NAME_BASE "${PROJECT_NAME}") + if(USE_THREADS) + string(APPEND TARGET_NAME_BASE "-mt") + elseif(USE_OPENMP) + string(APPEND TARGET_NAME_BASE "-omp") + elseif(USE_PGC) + string(APPEND TARGET_NAME_BASE "-st-pgc") + else() + string(APPEND TARGET_NAME_BASE "-st") + endif() + set(TARGET_NAME "${TARGET_NAME_BASE}-${TARGET_TYPE}") + endif() + + # Construct output name: + # Note: accepts a hardcoded name from caller, otherwise + # it's the same as the target name (minus type). + set(OUTPUT_NAME_BASE) + if(NOT "${args_OUTPUT_NAME_BASE}" STREQUAL "") + set(OUTPUT_NAME_BASE "${args_OUTPUT_NAME_BASE}") + else() + string(REPLACE "-${TARGET_TYPE}" "" OUTPUT_NAME_BASE "${TARGET_NAME}") + endif() + + # For variants not supported, skip them: + if(IS_WASM AND (IS_SHAREDLIB OR USE_THREADS OR USE_OPENMP)) + return() + elseif(IS_OBJLIB AND NOT USE_OBJECT_LIBS) + return() + endif() + + # Create target: + set(INSTALL_DIR) + if(IS_OBJLIB) + list(APPEND ALL_OBJLIB_TARGETS ${TARGET_NAME}) + set(ALL_OBJLIB_TARGETS ${ALL_OBJLIB_TARGETS} PARENT_SCOPE) + + add_library(${TARGET_NAME} OBJECT ${args_SOURCE}) + elseif(IS_SHAREDLIB) + list(APPEND ALL_SHAREDLIB_TARGETS ${TARGET_NAME}) + set(ALL_SHAREDLIB_TARGETS ${ALL_SHAREDLIB_TARGETS} PARENT_SCOPE) + + if(USE_OBJECT_LIBS) + string(REPLACE "-sharedlib" "-objlib" OBJ_LIB_TARGET_NAME ${TARGET_NAME}) + add_library(${TARGET_NAME} SHARED $) + else() + add_library(${TARGET_NAME} SHARED ${args_SOURCE}) + endif() + + # On windows, add a hard dependency on the app of same type. This is needed since + # the builds are parallelized and with the artifacts being named the same between apps + # and shared libs (amalgam.exe vs amalgam.dll), there are intermediate files that get created + # for both that can get clobbered or file system read/write errors (amalgam.exp, for example). So, + # make sure they never run at the same time. + if(IS_WINDOWS) + string(REPLACE "-sharedlib" "-app" APP_TARGET_NAME ${TARGET_NAME}) + add_dependencies(${TARGET_NAME} ${APP_TARGET_NAME}) + endif() + + # Set install dir: + set(INSTALL_DIR "lib") + elseif(IS_APP) + list(APPEND ALL_APP_TARGETS ${TARGET_NAME}) + set(ALL_APP_TARGETS ${ALL_APP_TARGETS} PARENT_SCOPE) + + if(USE_OBJECT_LIBS) + string(REPLACE "-app" "-objlib" OBJ_LIB_TARGET_NAME ${TARGET_NAME}) + add_executable(${TARGET_NAME} ${args_APP_ONLY_SOURCE} $) + else() + add_executable(${TARGET_NAME} ${args_APP_ONLY_SOURCE} ${args_SOURCE}) + endif() + + # Set install dir: + set(INSTALL_DIR "bin") + endif() + + + # + # Add options, properties, etc to targets: + # + + # Include dirs: + if(NOT "${args_INCLUDE_DIRS}" STREQUAL "") + target_include_directories(${TARGET_NAME} PUBLIC ${args_INCLUDE_DIRS}) + endif() + + # Compiler defines: + if(NOT "${args_COMPILER_DEFINES}" STREQUAL "") + target_compile_definitions(${TARGET_NAME} PUBLIC ${args_COMPILER_DEFINES}) + endif() + + # Link libraries: + if(NOT "${args_LINK_LIBRARIES}" STREQUAL "") + target_link_libraries(${TARGET_NAME} PUBLIC ${args_LINK_LIBRARIES}) + endif() + + # Library define symbol: + if(IS_SHAREDLIB) + set_target_properties(${TARGET_NAME} PROPERTIES DEFINE_SYMBOL "AMALGAM_LIB_EXPORTS") + endif() + + # Add file name define for resource files that use it: + # Note: objlibs don't have output targets don't add them there + if(NOT IS_OBJLIB) + target_compile_definitions(${TARGET_NAME} PUBLIC AMALGAM_FILE_NAME="$") + endif() + + # Output name base: + # Note: objlibs don't have output names + if(NOT IS_OBJLIB) + set_target_properties(${TARGET_NAME} PROPERTIES OUTPUT_NAME "${OUTPUT_NAME_BASE}") + endif() + + # Threads: + if(USE_THREADS) + target_compile_definitions(${TARGET_NAME} PUBLIC MULTITHREAD_SUPPORT) + endif() + + # Unfortunately, this is needed for all Unix targets, not just multithreaded ones, + # see: https://howardhinnant.github.io/date/tz.html#Installation + # TODO: Maybe when moving to C++20 and the official verision of the date library, this won't be needed + # for single threaded targets + # TODO: move to using official CMake Threads include when it works + if(IS_UNIX AND NOT IS_WASM) + target_compile_options(${TARGET_NAME} PUBLIC -pthread) + target_link_libraries(${TARGET_NAME} PUBLIC pthread) + endif() + + # OpenMP: + # TODO: move to using official CMake OpenMP include when it works + if(USE_OPENMP) + if(IS_MSVC) + target_compile_options(${TARGET_NAME} PUBLIC /openmp) + elseif(IS_GCC_FLAG_COMPAT_COMPILER) + target_link_libraries(${TARGET_NAME} PUBLIC -lgomp) + elseif(IS_APPLECLANG) + # Assumes libomp was installed with brew at this location + target_compile_options(${TARGET_NAME} PUBLIC -Xpreprocessor -fopenmp -I/usr/local/opt/libomp/include) + target_link_options(${TARGET_NAME} PUBLIC -L/usr/local/opt/libomp/lib) + target_link_libraries(${TARGET_NAME} PUBLIC /usr/local/opt/libomp/lib/libomp.a) + endif() + endif() + + # PGC: + if(USE_PGC) + target_compile_definitions(${TARGET_NAME} PUBLIC PEDANTIC_GARBAGE_COLLECTION) + endif() + + # Advanced arch intrinsics: + if(USE_ADVANCED_ARCH_INTRINSICS) + if (IS_AMD64) + set(INTRINSICS_FLAGS) + if(IS_MSVC) + set(INTRINSICS_FLAGS "/arch:${ADVANCED_INTRINSICS_AMD64}") + elseif(IS_GCC_FLAG_COMPAT_COMPILER) + set(INTRINSICS_FLAGS "-m${ADVANCED_INTRINSICS_AMD64}") + elseif(IS_APPLECLANG) + set(INTRINSICS_FLAGS "-march=core-${ADVANCED_INTRINSICS_AMD64}") + endif() + target_compile_options(${TARGET_NAME} PUBLIC ${INTRINSICS_FLAGS}) + endif() + endif() + + # IDE folder: + if(NOT "${args_IDE_FOLDER}" STREQUAL "") + set_target_properties(${TARGET_NAME} PROPERTIES FOLDER "${args_IDE_FOLDER}") + endif() + + # Install: + # Notes: + # 1) objlibs can't be installed + # 2) if not installing target, mark it as TEST_ONLY to be used for testing + if(NOT IS_OBJLIB) + if(NOT NO_INSTALL) + install(TARGETS ${TARGET_NAME} DESTINATION "${INSTALL_DIR}" PERMISSIONS ${DEFAULT_INSTALL_PERMISSIONS}) + + # Extra files to install for WASM + if(IS_WASM) + install( + FILES + "$/$.data" + "$/$.wasm" + DESTINATION "${INSTALL_DIR}" + PERMISSIONS ${DEFAULT_INSTALL_PERMISSIONS} + ) + file(MAKE_DIRECTORY "out/config") + set(WASM_DECLARATION_FILE "out/config/${TARGET_NAME_BASE}.d.cts") + file(COPY_FILE "build/wasm/amalgam-wasm.d.cts" "${WASM_DECLARATION_FILE}" ONLY_IF_DIFFERENT) + install(FILES "${WASM_DECLARATION_FILE}" DESTINATION "${INSTALL_DIR}" PERMISSIONS ${DEFAULT_INSTALL_PERMISSIONS}) + endif() + endif() + endif() + +endfunction() diff --git a/build/cmake/global_compiler_flags.cmake b/build/cmake/global_compiler_flags.cmake new file mode 100644 index 00000000..27be29c3 --- /dev/null +++ b/build/cmake/global_compiler_flags.cmake @@ -0,0 +1,131 @@ +# +# Global compiler defines & flags: +# + +# TODO: do we need this? Can it be smaller? How do we set it on all platforms? +set(DEFAULT_STACK_SIZE 67108864) + +set(IS_MSVC False) +set(IS_GCC False) +set(IS_CLANG False) +set(IS_GNU_FLAG_COMPAT_COMPILER False) +set(IS_APPLECLANG False) +if(MSVC) + + set(IS_MSVC True) + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.30) + message(WARNING "MSVC version '${CMAKE_CXX_COMPILER_VERSION}' < 19.30, usage is not officially supported") + endif() + + set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") + + # Common flags: + string(APPEND CMAKE_CXX_FLAGS " /nologo /W3 /WX /MP /GS /TP /FC /fp:precise /Zc:wchar_t /Zc:forScope /Zc:inline /analyze-") + string(APPEND CMAKE_EXE_LINKER_FLAGS " /STACK:${DEFAULT_STACK_SIZE}") + + # Debug flags: + string(APPEND CMAKE_CXX_FLAGS_DEBUG " /JMC") + if(IS_VISUALSTUDIO) + # EditAndContinue only works with VS generator: + string(APPEND CMAKE_CXX_FLAGS_DEBUG " /ZI") + else() + string(APPEND CMAKE_CXX_FLAGS_DEBUG " /Zi") + endif() + + # Release flags: + string(REPLACE "/O2" "/Ox" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + string(APPEND CMAKE_CXX_FLAGS_RELEASE " /Zi /Oi /Ot /Oy /GF /GL /GT /Gy /Gd") + set(ALL_LINKER_FLAGS_RELEASE CMAKE_EXE_LINKER_FLAGS_RELEASE CMAKE_SHARED_LINKER_FLAGS_RELEASE) + foreach(FLAGS_NAME in ${ALL_LINKER_FLAGS_RELEASE}) + string(APPEND ${FLAGS_NAME} " /LTCG:incremental /OPT:REF /OPT:ICF /DEBUG:FASTLINK") + endforeach() + +elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + + # Note: GCC and Clang are mostly flag-compatible so we set flags the same but allow for special handling if needed + # through IS_* vars. + set(IS_GCC_FLAG_COMPAT_COMPILER True) + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") + set(IS_GCC True) + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 10) + message(WARNING "GCC version '${CMAKE_CXX_COMPILER_VERSION}' < 10, usage is not officially supported") + endif() + elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + set(IS_CLANG True) + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 17) + message(WARNING "Clang version '${CMAKE_CXX_COMPILER_VERSION}' < 17, usage is not officially supported") + endif() + endif() + + string(APPEND CMAKE_CXX_FLAGS " -fPIC -fno-strict-aliasing -Wall -Wno-unknown-pragmas -Werror") + #string(APPEND CMAKE_CXX_FLAGS " -Wpedantic -Wextra -Wabi") # Additional warnings that are fairly strict, not enabled right now + + if(IS_ARM64) + # See for discussion why set: https://stackoverflow.com/questions/52020305/what-exactly-does-gccs-wpsabi-option-do-what-are-the-implications-of-supressi + string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi") + endif() + + if(IS_WASM) + string(APPEND CMAKE_CXX_FLAGS " -sMEMORY64=2 -Wno-experimental -DSIMDJSON_NO_PORTABILITY_WARNING") + string(APPEND CMAKE_EXE_LINKER_FLAGS " -sINVOKE_RUN=0 -sALLOW_MEMORY_GROWTH=1 -sINITIAL_MEMORY=65536000 -sMEMORY_GROWTH_GEOMETRIC_STEP=0.50 -sMODULARIZE=1 -sEXPORT_NAME=AmalgamRuntime -sENVIRONMENT=worker -sEXPORTED_RUNTIME_METHODS=cwrap,ccall,FS,setValue,getValue -sEXPORTED_FUNCTIONS=_malloc,_free,_LoadEntity,_StoreEntity,_ExecuteEntity,_ExecuteEntityJsonPtr,_DeleteEntity,_GetEntities,_SetRandomSeed,_SetJSONToLabel,_GetJSONPtrFromLabel,_SetSBFDataStoreEnabled,_IsSBFDataStoreEnabled,_GetVersionString,_SetMaxNumThreads,_GetMaxNumThreads --preload-file /wasm/tzdata@/tzdata --preload-file /wasm/etc@/etc") + endif() + +elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") + + set(IS_APPLECLANG True) + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13) + message(WARNING "AppleClang version '${CMAKE_CXX_COMPILER_VERSION}' < 13, usage is not officially supported") + endif() + + string(APPEND CMAKE_CXX_FLAGS " -fPIC -fno-strict-aliasing -Wall -Wno-unknown-pragmas -Werror") + #string(APPEND CMAKE_CXX_FLAGS " -Wpedantic -Wextra -Wabi") # Additional warnings that are fairly strict, not enabled right now + +else() + + message(WARNING "Unknown generator '${CMAKE_CXX_COMPILER_ID}', usage is not officially supported") + +endif() + +# Unix only: +set(ARCH_VERSION "amd64") +set(ARM64_LIB_DIR) # used by arm emulator for testing +if(IS_UNIX) + if(IS_WASM) + add_compile_definitions(USE_OS_TZDB=0 HAS_REMOTE_API=0 INSTALL=/) + else() + add_compile_definitions(USE_OS_TZDB=1) + endif() + + # Arch flag: + if(IS_AMD64) + set(ARCH_VERSION "x86-64") + elseif(IS_ARM64) + if(IS_MACOS) + set(ARCH_VERSION "armv8.4-a+simd") + elseif(IS_LINUX) + set(ARM64_LIB_DIR "/usr/aarch64-linux-gnu") + if(IS_ARM64_8A) + set(ARCH_VERSION "armv8-a+simd") + else() + set(ARCH_VERSION "armv8.2-a+simd+rcpc") + endif() + endif() + endif() + if(NOT IS_WASM) + string(APPEND CMAKE_CXX_FLAGS " -march=${ARCH_VERSION}") + endif() +endif() + +# MSVC only: +if(IS_MSVC) + add_compile_definitions(UNICODE _UNICODE) +endif() + +# amd64 advanced intrinsics: +# Note: allowed values - avx avx2 avx512 +set(ADVANCED_INTRINSICS_AMD64 "avx2") +if(IS_WINDOWS) + string(TOUPPER ${ADVANCED_INTRINSICS_AMD64} ADVANCED_INTRINSICS_AMD64) +endif() +# Used for naming built binaries without AVX: +set(NO_ADVANCED_INTRINSICS_AMD64_SUFFIX "noavx") \ No newline at end of file diff --git a/build/cmake/global_settings.cmake b/build/cmake/global_settings.cmake new file mode 100644 index 00000000..1aa8a4b3 --- /dev/null +++ b/build/cmake/global_settings.cmake @@ -0,0 +1,110 @@ +# +# Global settings across all projects +# + +# For IDEs that support it, turn on folders: +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + +# Testing: +enable_testing() + +# Remove library prefix for compatibility with callers who don't expect a lib prefix: +# TODO: evantually update callers to understand libs on platforms that typically have prefix +# Example: libamalgam.so/libamalgam.dylib/amalgam.dll +set(CMAKE_STATIC_LIBRARY_PREFIX "") +set(CMAKE_SHARED_LIBRARY_PREFIX "") + +# Generator: +set(IS_NINJA False) +set(IS_VISUALSTUDIO False) +if("${CMAKE_GENERATOR}" MATCHES "[Nn]inja") + set(IS_NINJA True) +elseif("${CMAKE_GENERATOR}" MATCHES "^Visual Studio") + set(IS_VISUALSTUDIO True) +else() + message(WARNING "Unknown generator '${CMAKE_GENERATOR}', usage is not officially supported") +endif() + +# OS: +set(OS "unknown") +set(OSv2 "unknown") +set(IS_WINDOWS False) +set(IS_UNIX False) +set(IS_LINUX False) +set(IS_MACOS False) +set(NEWLINE_STYLE "unknown") +if(WIN32) + set(OS "windows") + set(OSv2 "${OS}") + set(IS_WINDOWS True) + set(NEWLINE_STYLE "WIN32") +elseif(UNIX AND NOT APPLE) + set(OS "linux") + set(OSv2 "${OS}") + set(IS_UNIX True) + set(IS_LINUX True) + set(NEWLINE_STYLE "UNIX") +elseif(UNIX) + set(OS "macos") + set(OSv2 "darwin") + set(IS_UNIX True) + set(IS_MACOS True) + set(NEWLINE_STYLE "UNIX") +else() + message(WARNING "Unknown OS, usage is not officially supported") +endif() + +# Arch: +set(IS_AMD64 False) +set(IS_ARM64 False) +set(IS_ARM64_8A False) +set(IS_WASM False) +if("${ARCH}" MATCHES "^arm64") + set(IS_ARM64 True) + if("${ARCH}" STREQUAL "arm64_8a") + set(IS_ARM64_8A True) + endif() +elseif("${ARCH}" STREQUAL "wasm64") + set(IS_WASM True) +else() # default arch + set(IS_AMD64 True) + set(ARCH "amd64") +endif() + +# System info: +cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES) +cmake_host_system_information(RESULT NUMBER_OF_PHYSICAL_CORES QUERY NUMBER_OF_PHYSICAL_CORES) +cmake_host_system_information(RESULT TOTAL_VIRTUAL_MEMORY QUERY TOTAL_VIRTUAL_MEMORY) +cmake_host_system_information(RESULT AVAILABLE_VIRTUAL_MEMORY QUERY AVAILABLE_VIRTUAL_MEMORY) +cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) +cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) + +# Install permissions: +set(DEFAULT_INSTALL_PERMISSIONS) +if(IS_WASM) + set(DEFAULT_INSTALL_PERMISSIONS + OWNER_READ OWNER_WRITE + GROUP_READ + WORLD_READ + ) +else() + set(DEFAULT_INSTALL_PERMISSIONS + OWNER_READ OWNER_WRITE OWNER_EXECUTE + GROUP_READ GROUP_EXECUTE + WORLD_READ WORLD_EXECUTE + ) +endif() + +# RPATH: +if(IS_UNIX) + set(CMAKE_SKIP_INSTALL_RPATH True) + + if(IS_WASM) + set(CMAKE_BUILD_WITH_INSTALL_RPATH True) + endif() +endif() + +# Exe ext for WASM: +if(IS_WASM) + set(CMAKE_EXECUTABLE_SUFFIX ".cjs") +endif() diff --git a/build/cmake/version.cmake b/build/cmake/version.cmake new file mode 100644 index 00000000..52966fde --- /dev/null +++ b/build/cmake/version.cmake @@ -0,0 +1,116 @@ +# +# Version parsing/setting: +# + +# Unknown version vars: +set(AMALGAM_VERSION_MAJOR_UNKNOWN 0) +set(AMALGAM_VERSION_MINOR_UNKNOWN 0) +set(AMALGAM_VERSION_PATCH_UNKNOWN 0) +set(AMALGAM_VERSION_PRERELEASE_UNKNOWN "alpha") +set(AMALGAM_VERSION_METADATA_UNKNOWN "local.dev") +set(AMALGAM_VERSION_UNKNOWN + "${AMALGAM_VERSION_MAJOR_UNKNOWN}.${AMALGAM_VERSION_MINOR_UNKNOWN}.${AMALGAM_VERSION_PATCH_UNKNOWN}-${AMALGAM_VERSION_PRERELEASE_UNKNOWN}+${AMALGAM_VERSION_METADATA_UNKNOWN}") + +# Version: +set(AMALGAM_VERSION_ORIG "${AMALGAM_VERSION_UNKNOWN}") +set(AMALGAM_VERSION) +set(IS_VERSION_FROM_GIT_TAG False) +if(DEFINED ENV{BUILD_BUILDNUMBER} AND NOT "$ENV{BUILD_BUILDNUMBER}" STREQUAL "") + + # TODO: remove this path after GitHub move + # Defined by Azure pipeline (Build.BuildNumber) + message(STATUS "Reading version from env var 'BUILD_BUILDNUMBER'") + set(AMALGAM_VERSION_ORIG "$ENV{BUILD_BUILDNUMBER}") + set(AMALGAM_VERSION "${AMALGAM_VERSION_ORIG}") + +elseif(DEFINED ENV{AMALGAM_BUILD_VERSION} AND NOT "$ENV{AMALGAM_BUILD_VERSION}" STREQUAL "") + + message(STATUS "Reading version from env var 'AMALGAM_BUILD_VERSION'") + set(AMALGAM_VERSION_ORIG "$ENV{AMALGAM_BUILD_VERSION}") + set(AMALGAM_VERSION "${AMALGAM_VERSION_ORIG}") + +else() + + if(TRY_GIT_TAG_FOR_UNKNOWN_VERSION) + + message(STATUS "Reading version from latest git tag") + + # Get latest git tag + find_package(Git) + if(Git_NOT_FOUND) + message(WARNING "Git not found, cannot get release tags. Defaulting to unknown version") + else() + execute_process(COMMAND + "${GIT_EXECUTABLE}" describe --abbrev=0 --tags + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + RESULT_VARIABLE GIT_RETURN_CODE + OUTPUT_VARIABLE GIT_TAG_LATEST + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(GIT_RETURN_CODE AND NOT GIT_RETURN_CODE EQUAL 0) + message(WARNING "Git command failed, cannot get release tags. Defaulting to unknown version") + else() + set(AMALGAM_VERSION_ORIG ${GIT_TAG_LATEST}) + string(REPLACE "version-" "" GIT_TAG_LATEST "${GIT_TAG_LATEST}") + string(REGEX REPLACE "^v" "" GIT_TAG_LATEST "${GIT_TAG_LATEST}") + set(AMALGAM_VERSION ${GIT_TAG_LATEST}) + set(IS_VERSION_FROM_GIT_TAG True) + endif() + endif() + + else() + message(STATUS "No version given. Defaulting to unknown version") + endif() + +endif() + +# Parse version number: +string(REGEX MATCH "^([0-9]+)\\.([0-9]+)\\.([0-9]+)\\-*([^\\+]*)\\+*(.*)" VERSION_MATCH "${AMALGAM_VERSION}") +set(AMALGAM_VERSION_MAJOR ${CMAKE_MATCH_1}) +set(AMALGAM_VERSION_MINOR ${CMAKE_MATCH_2}) +set(AMALGAM_VERSION_PATCH ${CMAKE_MATCH_3}) +set(AMALGAM_VERSION_PRERELEASE ${CMAKE_MATCH_4}) +set(AMALGAM_VERSION_METADATA ${CMAKE_MATCH_5}) + +# If version couldn't be parsed, warn and set to unknown: +if("${AMALGAM_VERSION_MAJOR}" STREQUAL "" OR "${AMALGAM_VERSION_MINOR}" STREQUAL "" OR "${AMALGAM_VERSION_PATCH}" STREQUAL "") + message(WARNING "Version number could not be parsed. Defaulting to unknown version") + set(AMALGAM_VERSION "${AMALGAM_VERSION_UNKNOWN}") + set(AMALGAM_VERSION_MAJOR ${AMALGAM_VERSION_MAJOR_UNKNOWN}) + set(AMALGAM_VERSION_MINOR ${AMALGAM_VERSION_MINOR_UNKNOWN}) + set(AMALGAM_VERSION_PATCH ${AMALGAM_VERSION_PATCH_UNKNOWN}) + set(AMALGAM_VERSION_PRERELEASE "${AMALGAM_VERSION_PRERELEASE_UNKNOWN}") + set(AMALGAM_VERSION_METADATA "${AMALGAM_VERSION_METADATA_UNKNOWN}") +else() + + # Alter version slightly if we read it from a git tag: + # Note: this is so the semver is always consistent, even between local and automated + # builds. If the version is not given to us in the env, we use the last + # annotated git tag, increment its patch, and assign new prerelease + # and metadata as if it was an unknown version (lowest precedence). + if(IS_VERSION_FROM_GIT_TAG) + math(EXPR AMALGAM_VERSION_PATCH "${AMALGAM_VERSION_PATCH}+1") + set(AMALGAM_VERSION_PRERELEASE "${AMALGAM_VERSION_PRERELEASE_UNKNOWN}") + set(AMALGAM_VERSION_METADATA "${AMALGAM_VERSION_METADATA_UNKNOWN}") + endif() + +endif() + +set(AMALGAM_VERSION_SUFFIX) +if(NOT "${AMALGAM_VERSION_PRERELEASE}" STREQUAL "") + string(APPEND AMALGAM_VERSION_SUFFIX "-${AMALGAM_VERSION_PRERELEASE}") +endif() +if(NOT "${AMALGAM_VERSION_METADATA}" STREQUAL "") + string(APPEND AMALGAM_VERSION_SUFFIX "+${AMALGAM_VERSION_METADATA}") +endif() +set(AMALGAM_VERSION_BASE "${AMALGAM_VERSION_MAJOR}.${AMALGAM_VERSION_MINOR}.${AMALGAM_VERSION_PATCH}") +set(AMALGAM_VERSION_FULL "${AMALGAM_VERSION_BASE}${AMALGAM_VERSION_SUFFIX}") +string(REPLACE "+" "\\+" AMALGAM_VERSION_FULL_ESCAPED ${AMALGAM_VERSION_FULL}) + +# Write version header: +configure_file( + "${CMAKE_SOURCE_DIR}/build/cmake/configure_files/AmalgamVersion.h.in" + "${CMAKE_SOURCE_DIR}/src/Amalgam/AmalgamVersion.h" + NEWLINE_STYLE ${NEWLINE_STYLE} + @ONLY +) diff --git a/build/powershell/Create-Amalgam-Version-Header.ps1 b/build/powershell/Create-Amalgam-Version-Header.ps1 new file mode 100644 index 00000000..5e39bc81 --- /dev/null +++ b/build/powershell/Create-Amalgam-Version-Header.ps1 @@ -0,0 +1,39 @@ +#!/usr/bin/env pwsh +<# +.SYNOPSIS + Creates the Amalgam version header + +.DESCRIPTION + This script creates the Amalgam version header. It is only needed for legacy + builds (i.e., legacy VS solutions not generated by CMake) and used as a + prebuild step. This is not needed when using the CMake build system. + +.EXAMPLE + Create-Amalgam-Version-Header.ps1 +#> + +# Stop if errors: +$ErrorActionPreference="Stop" + +# Run: +function Create-Amalgam-Version-Header { + + [CmdletBinding()] + param() + + # Read and replace version numbers: + $HeaderFileIn = "$PSScriptRoot/../cmake/configure_files/AmalgamVersion.h.in" + $NewHeaderContents = Get-Content($HeaderFileIn) + $NewHeaderContents = $NewHeaderContents.replace("@AMALGAM_VERSION_MAJOR@","0") + $NewHeaderContents = $NewHeaderContents.replace("@AMALGAM_VERSION_MINOR@","0") + $NewHeaderContents = $NewHeaderContents.replace("@AMALGAM_VERSION_PATCH@","0") + $NewHeaderContents = $NewHeaderContents.replace("@AMALGAM_VERSION_SUFFIX@","") + + # Write new header: + $HeaderFileOut = "$PSScriptRoot/../../src/Amalgam/AmalgamVersion.h" + Set-Content -Path $HeaderFileOut -Value $NewHeaderContents +} + +Create-Amalgam-Version-Header + +exit 0 \ No newline at end of file diff --git a/build/powershell/Download-Tzdata.ps1 b/build/powershell/Download-Tzdata.ps1 new file mode 100644 index 00000000..e6b866fd --- /dev/null +++ b/build/powershell/Download-Tzdata.ps1 @@ -0,0 +1,54 @@ +#!/usr/bin/env pwsh +<# +.SYNOPSIS + Download tzdata + +.DESCRIPTION + This script downloads the latest tzdata and places it in $Path + +.EXAMPLE + Download-Tzdata.ps1 +#> + +# Source helper functions: +. $PSScriptRoot/Helper-Functions.ps1 + +# Stop if errors: +$ErrorActionPreference="Stop" + +# Build all: +function Download-Tzdata { + + [CmdletBinding()] + param( + [Parameter()] + [string]$Path = "$HOME/.howso" + ) + + $OS = Get-OS + Write-Host "OS: $OS" + + $TzDataPath = "$Path/tzdata" + if(-not (Test-Path "$TzDataPath")) { + New-Item -ItemType Directory -Force -Path "$TzDataPath" | Out-Null + $LocalTarGz = "$Path/tzdata.tar.gz" + $ProgressPreference = 'SilentlyContinue' + Invoke-WebRequest -Uri "https://data.iana.org/time-zones/releases/tzdata2023c.tar.gz" -Outfile "$LocalTarGz" + $ProgressPreference = 'Continue' + Invoke-NativeCommand -Cmd "tar" -Arguments @("-xzf", "$LocalTarGz", "-C", "$TzDataPath") + Remove-Item -Path "$LocalTarGz" + Write-Host "tzdata written to: $TzDataPath" + + if ($OS.equals("windows")) { + $TimeZoneFile = "$TzDataPath/windowsZones.xml" + Invoke-WebRequest -Uri "https://raw.githubusercontent.com/unicode-org/cldr/main/common/supplemental/windowsZones.xml" -Outfile "$TimeZoneFile" + Write-Host "Windows time zones written to: $TimeZoneFile" + } + } else { + Write-Host "tzdata already exists, nothing to do" + } +} + +Download-Tzdata @args + +exit 0 \ No newline at end of file diff --git a/build/powershell/Fixup-Generated-VisualStudio-Projects.ps1 b/build/powershell/Fixup-Generated-VisualStudio-Projects.ps1 new file mode 100644 index 00000000..3d881f5e --- /dev/null +++ b/build/powershell/Fixup-Generated-VisualStudio-Projects.ps1 @@ -0,0 +1,77 @@ +#!/usr/bin/env pwsh +<# +.SYNOPSIS + Fixup the CMake generated VisualStudio project files + +.DESCRIPTION + This script edits all of the CMake generated VisualStudio project files with + updates that are not available through CMake. + + The first case is because CMake itself does not allow for setting config-specific + settings on a PropertyGroup (set_property for VS_GLOBAL settings does not accept + generator expressions). Therefore, there is no way to set those settings in CMake. + + The second case is issues where complier/linker flags should set the right behavior + but for some reason... don't. This could be because of a CMake bug or user + error when we set the flags (setting opposing flags possibly). + + Both of these cases are issues and are currently handled in this script post CMake + generate. + +.EXAMPLE + Fixup-Generated-VisualStudio-Projects.ps1 +#> + +# Source helper functions: +. $PSScriptRoot/Helper-Functions.ps1 + +# Stop if errors: +$ErrorActionPreference="Stop" + +function Fixup-Generated-VisualStudio-Projects { + + [CmdletBinding()] + param( + [Parameter()] + [string]$Preset = "amd64-windows-vs" + ) + + $OS = Get-OS + Write-Host "OS: $OS" + + # VS generator only supported on windows: + if($OS.equals("windows")) { + + $nl = [Environment]::NewLine + Write-Host "Gathering VS project files..." + $ProjectFiles = (Get-ChildItem "out/build/$Preset/amalgam-*.vcxproj") | Select-Object -ExpandProperty FullName | Out-String -Stream | Select-String -Pattern "(app|sharedlib|objlib)" + foreach($ProjFile in $ProjectFiles) { + + # Read proj in as one string for simple replacements: + $ProjFileContents = Get-Content -Path $ProjFile -Encoding UTF8 -Raw + + # Properties in "PropertyGroup" for a specific config (debug, release) cannot be set through CMake: + $ProjFileContents = $ProjFileContents -ireplace "($nl)", "`$1 true$nl" + $ProjFileContents = $ProjFileContents -ireplace "($nl)", "`$1 true$nl" + + # Properties in "ProjectReference" can't be set through CMake: + $ProjFileContents = $ProjFileContents -ireplace '()(false)', '$1true' + + # For some reason, setting /ZI for debug builds does not correctly set EditAndContinue. It is unknown why this + # true, possibly a CMake bug, VS bug in reading order of options, or we incorrectly set the compiler/linker options. + $re = [regex]'ProgramDatabase' + $ProjFileContents = $re.Replace($ProjFileContents, 'EditAndContinue', 1) + + # Write file back out: + Set-Content -Path $ProjFile -Value $ProjFileContents -Encoding UTF8 + } + + Write-Host "Fix-up completed, edited $($ProjectFiles.length) files" + } else { + Write-Host "Visual Studio generation (and fixup) only supported on windows, nothing to do" + } +} + +Fixup-Generated-VisualStudio-Projects @args + +exit 0 \ No newline at end of file diff --git a/build/powershell/Helper-Functions.ps1 b/build/powershell/Helper-Functions.ps1 new file mode 100644 index 00000000..a8d57045 --- /dev/null +++ b/build/powershell/Helper-Functions.ps1 @@ -0,0 +1,68 @@ +<# +.SYNOPSIS + Helper functions + +.DESCRIPTION + Helper functions for OS and native command handling +#> + +# Stop if errors: +$ErrorActionPreference="Stop" + +# Get the OS: +function Get-OS { + $OSCheck = [Environment]::OSVersion.VersionString + $OS = "unknown" + if ($IsLinux) { + $OS = "linux" + } + elseif ($IsMacOS) { + $OS = "macos" + } + elseif ($OSCheck.Contains("Windows")) { + $OS = "windows" + } + + return $OS +} + +# Run a native command: +function Invoke-NativeCommand { + param( + [Parameter(Mandatory=$true)] + [string] $Cmd, + [Parameter(Mandatory=$true)] + [string[]] $Arguments + ) + + $StartTime = $(get-date) + Write-Host "Executing: '$Cmd $Arguments'" + & $Cmd @Arguments + $ExitCode = $LastExitCode + $ElapsedTime = $(get-date) - $StartTime + Write-Host "Wall clock runtime: $ElapsedTime" + + if ($ExitCode -ne 0) { + Write-Error "Exit code $LastExitCode while running $Cmd $Arguments" + } +} + +# Verify supported build variant: +function Verify-Build { + param( + [Parameter(Mandatory=$true)] + [string] $OS, + [Parameter(Mandatory=$true)] + [string] $Arch + ) + + if ($Arch.equals("arm64")) { + if ($OS.equals("windows")) { + Write-Error "arm64 only supported on linux & macos" + } + } elseif ($Arch.equals("wasm64")) { + if (!$OS.equals("linux")) { + Write-Error "wasm64 only supported on linux" + } + } +} \ No newline at end of file diff --git a/build/powershell/SetUp-MacOs-Pipeline-Tools.ps1 b/build/powershell/SetUp-MacOs-Pipeline-Tools.ps1 new file mode 100644 index 00000000..df96e172 --- /dev/null +++ b/build/powershell/SetUp-MacOs-Pipeline-Tools.ps1 @@ -0,0 +1,59 @@ +#!/usr/bin/env pwsh +<# +.SYNOPSIS + Setup pipeline tools for macos + +.DESCRIPTION + This script sets up the tools needed for pipeline builds not covered by + the build machines/containers on macos. + +.PARAMETER Arch + Specifies the arch for the current platform. + +.EXAMPLE + SetUp-MacOs-Pipeline-Tools.ps1 -Arch amd64 +#> + +# Source helper functions: +. $PSScriptRoot/Helper-Functions.ps1 + +# Stop if errors: +$ErrorActionPreference="Stop" + +# Build all: +function SetUp-MacOs-Pipeline-Tools { + + [CmdletBinding()] + param( + [Parameter()] + [string]$Arch = "amd64" + ) + + $OS = Get-OS + Write-Host "OS: $OS" + + if ($OS.equals("macos")) { + + # Hack to get the arm64 lib downloaded for cross compile + if ($Arch.equals("arm64")) { + Invoke-NativeCommand -Cmd "brew" -Arguments @("uninstall", "--ignore-dependencies", "libomp") + Invoke-NativeCommand -Cmd "brew" -Arguments @("cleanup", "-s") + Invoke-NativeCommand -Cmd "rm" -Arguments @("-rf", "$(brew --cache)") + Invoke-NativeCommand -Cmd "brew" -Arguments @("fetch", "--force", "--bottle-tag=arm64_big_sur", "libomp") + Invoke-NativeCommand -Cmd "brew" -Arguments @("install", "$(brew --cache --bottle-tag=arm64_big_sur libomp)") + } else { + Invoke-NativeCommand -Cmd "brew" -Arguments @("install", "libomp") + } + Invoke-NativeCommand -Cmd "brew" -Arguments @("install", "ninja") + + Invoke-NativeCommand -Cmd "brew" -Arguments @("list", "--versions", "libomp") + Invoke-NativeCommand -Cmd "ninja" -Arguments @("--version") + + } else { + Write-Host "Not macos, nothing to do" + } +} + +SetUp-MacOs-Pipeline-Tools @args + +exit 0 \ No newline at end of file diff --git a/build/wasm/amalgam-wasm.d.cts b/build/wasm/amalgam-wasm.d.cts new file mode 100644 index 00000000..a2bcf1a0 --- /dev/null +++ b/build/wasm/amalgam-wasm.d.cts @@ -0,0 +1,3 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +export = AmalgamRuntime; +declare function AmalgamRuntime(overrides?: Partial): Promise; diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/docs/icon/amalgam.ico b/docs/icon/amalgam.ico new file mode 100644 index 00000000..f3fbe329 Binary files /dev/null and b/docs/icon/amalgam.ico differ diff --git a/docs/icon/amalgam_icon_128.png b/docs/icon/amalgam_icon_128.png new file mode 100644 index 00000000..e6b1b2b5 Binary files /dev/null and b/docs/icon/amalgam_icon_128.png differ diff --git a/docs/icon/amalgam_icon_16.png b/docs/icon/amalgam_icon_16.png new file mode 100644 index 00000000..eef87c3e Binary files /dev/null and b/docs/icon/amalgam_icon_16.png differ diff --git a/docs/icon/amalgam_icon_256.png b/docs/icon/amalgam_icon_256.png new file mode 100644 index 00000000..8bba67ea Binary files /dev/null and b/docs/icon/amalgam_icon_256.png differ diff --git a/docs/icon/amalgam_icon_32.png b/docs/icon/amalgam_icon_32.png new file mode 100644 index 00000000..f77af8cd Binary files /dev/null and b/docs/icon/amalgam_icon_32.png differ diff --git a/docs/icon/amalgam_icon_64.png b/docs/icon/amalgam_icon_64.png new file mode 100644 index 00000000..2ec8f8cc Binary files /dev/null and b/docs/icon/amalgam_icon_64.png differ diff --git a/docs/icon/make_icon.sh b/docs/icon/make_icon.sh new file mode 100644 index 00000000..3ce09094 --- /dev/null +++ b/docs/icon/make_icon.sh @@ -0,0 +1,7 @@ +#!/bin/sh +# +# Create amalgam icon +# Tool: https://imagemagick.org/script/convert.php +# + +convert amalgam_icon_16.png amalgam_icon_32.png amalgam_icon_64.png amalgam_icon_128.png amalgam_icon_256.png amalgam.ico diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 00000000..c6081281 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,406 @@ + + + + +Amalgam Documentation + + + + + + + + + + + + +

Amalgam Language Reference

+ + + + + + + + \ No newline at end of file diff --git a/docs/language.js b/docs/language.js new file mode 100644 index 00000000..4492524d --- /dev/null +++ b/docs/language.js @@ -0,0 +1,1729 @@ + +var data = [ +/* + { + "parameter" : "Token params column text goes here", + "output" : "Output column text goes here", + "permissions" : "P column text", //optional, one of: entity, root_entity, e = entity, r = root_entity + "new value" : "N column text", //optional, one of: new, conditional, + = new, c = conditional + "description" : "Description column text goes here", + "example" : "" //insert this everywhere but leave it blank like so + }, +*/ + { + "parameter" : "system string command", + "output" : "*", + "permissions" : "r", + "new value" : "new", + "description" : "Executes system command specified by command. See system commands in later table.", + "example" : "(system \"exit\")" + }, + + { + "parameter" : "get_defaults string value_type", + "output" : "*", + "description" : "Retrieves the default values of the named field, either \"mutation_opcodes\" or \"mutation_types\"", + "example" : "(get_defaults mutation_opcodes)" + }, + + { + "parameter" : "parse string str", + "output" : "code", + "new value" : "new", + "description" : "String is parsed into code, and the result is returned.", + "example" : "(parse \"(list 1 2 3 4 5)\")" + }, + + { + "parameter" : "unparse code c [bool pretty_print] [bool sort_keys]", + "output" : "string", + "new value" : "new", + "description" : "Code is unparsed and the representative string is returned. If the pretty-print boolean is passed as true, output will be in pretty-print format, otherwise by default it will be inlined. If sort_keys is true, then in will print assoc structures and anything that could come in different orders in a natural sorted order by key, otherwise it will default to whatever order it is stored in memory.", + "example" : "(unparse (lambda (+ 4 3)) (true))" + }, + + { + "parameter" : "if [bool condition1] [code then1] [bool condition2] [code then2] ... [bool conditionN] [code thenN] [code else]", + "output" : "*", + "description" : "If the condition1 bool is true, then it will evaluate to the then1 argument. Otherwise condition2 will be checked, repeating for every pair. If there is an odd number of parameters, the last is the final 'else', and will be evaluated as that if all conditions are false.", + "example" : "(if (null) (print \"nothing\") 0 (print \"nothing\") (print \"hello\") )" + }, + + { + "parameter" : "seq [code c1] [code c2] ... [code cN]", + "output" : "*", + "description" : "Runs each code block sequentially. Evaluates to the result of the last code block run, unless it encounters a conclude, in which case it will halt processing and evaluate to the value returned by conclude.", + "example" : "(seq (print 1) (print 2) (print 3))" + }, + + { + "parameter" : "parallel [code c1] [code c2] ... [code cN]", + "output" : "null", + "concurrency" : true, + "description" : "Runs each code block, possibly in any order. Evaluates to null", + "example" : "(parallel (assign (assoc foo 1)) (assign (assoc bar 2)))" + }, + + { + "parameter" : "lambda * function [bool evaluate_and_wrap]", + "output" : "*", + "description" : "Evaluates to the code specified without evaluating it. Useful for referencing functions or handling data without evaluating it. The parameter evaluate_and_wrap defaults to false, but if it is true, it will evaluate the function, but then return the result wrapped in a lambda opcode.", + "example" : "(declare (assoc foo (lambda\n (declare (assoc x 6)\n (+ x 2)\n)))" + }, + + { + "parameter" : "conclude * conclusion", + "output" : "*", + "description" : "Evaluates to the conclusion wrapped in a conclude opcode. If a step in a seq, while, let, or declare evaluates to a conclude in evaluating its evaluation, then it will conclude the execution and evaluate to conclusion.", + "example" : "(print (seq (print \"seq1 \") (conclude \"success\") (print \"seq2\") ) )" + }, + + { + "parameter" : "call * function assoc arguments", + "output" : "*", + "new scope" : true, + "description" : "Evaluates the code after pushing the arguments assoc onto the scope stack.", + "example" : "(call foo (assoc x 3))" + }, + + { + "parameter" : "call_sandboxed * function assoc arguments [number operation_limit] [number max_node_allocations]", + "output" : "*", + "new scope" : true, + "description" : "Evaluates the code specified by *, isolating it from everything except for arguments, which is used as a single layer of the scope stack. This is useful when evaluating code passed by other entities that may or may not be trusted. Opcodes run from within call_sandboxed that require any form of permissions will not perform any action and will evaluate to null. If operation_limit is specified, it represents the number of operations that are allowed to be performed. If operation_limit is 0 or infinite, then an infinite of operations will be allotted, up to the limits of the current calling context. If max_node_allocations is specified, it represents the maximum number of nodes that are allowed to be allocated, limiting the total memory, up to the current calling context's limit. If max_node_allocations is 0 or infinite and the caller also has no limit, then there is no limit to the number of nodes to be allotted as long as the machine has sufficient memory. Note that if max_node_allocations is specified while call_sandboxed is being called in a multithreaded environment, if the collective memory from all the related threads exceeds the average memory specified by call_sandboxed, that may trigger a memory limit for the call_sandboxed.", + "example" : ";x will be null because it cannot be accessed\n(call_sandboxed (lambda (+ y x 4)) (assoc y 3))" + }, + + { + "parameter" : "while bool condition [code c1] [code c2] ... [code cN]", + "output" : "*", + "description" : "Each time the condition evaluates to true, it runs each of the code trees sequentially, looping. Evaluates to the last codeN or null if the condition was initially false or if it encounters a conclude, it will halt processing and evaluate to the value returned by conclude.", + "example" : "(let (assoc zz 1)\n (while (< zz 10)\n (print zz)\n (assign (assoc zz (+ zz 1)))\n )\n)" + }, + + { + "parameter" : "let assoc data [code function1] [code function2] ... [code functionN]", + "output" : "*", + "new scope" : true, + "description" : "Pushes the key-value pairs of data onto the scope stack so that they become the new variables, then runs each code block sequentially, evaluating to the last code block run, unless it encounters a conclude, in which case it will halt processing and evaluate to the value returned by conclude.", + "example" : "(let (assoc x 4 y 6) (print (+ x y)))" + }, + + { + "parameter" : "declare assoc data [code function1] [code function2] ... [code functionN]", + "output" : "*", + "description" : "For each key-value pair of data, if not already in the current context in the scope stack, it will define them. Then runs each code block sequentially, evaluating to the last code block run, unless it encounters a conclude, in which case it will halt processing and evaluate to the value returned by conclude.", + "example" : "(let (assoc x 4 y 6)\n (declare (assoc x 5 z 1)\n (print (+ x y z)) )\n)" + }, + + { + "parameter" : "assign assoc data|string variable_name [number index1|string index1|list walk_path1|* new_value1] [* new_value1] [number index2|string index2|list walk_path2] [* new_value2] ...", + "output" : "null", + "description" : "If the assoc data is specified, then for each key-value pair of data, assigns the value to the variable represented by the key found by tracing upward on the stack. If none found, it will create a variable on the top of the stack. If the string variable_name is specified, then it will find the variable by tracing up the stack and then use each pair of walk_path and new_value to assign new_value to that part of the variable's structure. If there are only two parameters, then it will assign the second parameter to the variable represented by the first.", + "example" : "(print (assign (assoc x 10)))\n(print x)\n(print (assign \"x\" 10)" + }, + + { + "parameter" : "accum assoc data|string variable_name [number index1|string index1|list walk_path1] [* accum_value1] [number index2|string index2|list walk_path2] [* accum_value2] ...", + "output" : "null", + "description" : "If the assoc data is specified, then for each key-value pair of data, assigns the value of the pair accumulated with the current value of the variable represented by the key on the stack, and stores the sum in the variable. It searches for the variable name tracing up the stack to find the variable. If none found, it will create a variable on the top of the stack. Accumulation is performed differently based on the type: for numeric values it adds, for strings, it concatenates, for lists it appends, and for assocs it appends based on the pair. If the string variable_name is specified, then it will find the variable by tracing up the stack and then use each pair of walk_path and new_value to accum accum_value to that part of the variable's structure. If there are only two parameters, then it will accum the second parameter to the variable represented by the first.", + "example" : "(print (assign (assoc x 10)))\n(print x)\n(print (accum (assoc x 1)))\n(print x)" + }, + + { + "parameter" : "retrieve [string variable_name|list variable_names|assoc indexset]", + "output" : "*", + "description" : "If string specified, gets the value on the stack specified by the string. If list specified, returns a list of the values on the stack specified by each element of the list interpreted as a string. If assoc specified, returns an assoc with the indices of the assoc which was passed in with the values being the appropriate values on the stack for each index.", + "example" : "(retrieve \"my_variable\")\n(assign (assoc rwww 1 raaa 2))\n(print (retrieve \"rwww\"))\n(print (retrieve (list \"rwww\" \"raaa\")))\n(print (retrieve (zip (list \"rwww\" \"raaa\") null)))\n" + }, + + { + "parameter" : "+ [number x1] [number x2] ... [number xN]", + "output" : "number", + "new value" : "new", + "concurrency" : true, + "description" : "Sums all numbers.", + "example" : "(print (+ 1 2 3 4))" + }, + + { + "parameter" : "- [number x1] [number x2] ... [number xN]", + "output" : "number", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to x1 - x2 - ... - xN. If only one parameter is passed, then it is treated as negative", + "example" : "(print (- 1 2 3 4))" + }, + + { + "parameter" : "* [number x1] [number x2] ... [number xN]", + "output" : "number", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to the product of all numbers.", + "example" : "(print (* 1 2 3 4))" + }, + + { + "parameter" : "/ [number x1] [number x2] ... [number xN]", + "output" : "number", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to x1 / x2 / ... / xN.", + "example" : "(print (/ 1.0 2 3 4))" + }, + + { + "parameter" : "mod [number x1] [number x2] ... [number xN]", + "output" : "number", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates the modulus of x1 % x2 % ... % xN.", + "example" : "(print (mod 1 2 3 4))" + }, + + { + "parameter" : "get_digits number value [number base] [number start_digit] [number end_digit] [bool relative_to_zero]", + "output" : "list of number", + "new value" : "new", + "description" : "Evaluates to a list of the number of each digit of value for the given base. If base is omitted, 10 is the default. The parameters start_digit and end_digit can be used to get a specific set of digits, but can also be infinite or null to catch all the digits on one side of the number. The interpretation of start_digit and end_digit are with respect to relative_to_zero, which defaults to true. If relative_to_zero is true, then the digits are indexed from their distance to zero, such as \"5 4 3 2 1 0 . -1 -2\". If relative_to_zero is false, then the digits are indexed from their most significant digit, such as \"0 1 2 3 4 5 . 6 7\". The default values of start_digit and end_digit are the most and least significant digits respectively.", + "example" : "(print (get_digits 16 8 .infinity 0))\n(print (get_digits 3 2 5 0))\n(print (get_digits 1.5 1.5 .infinity 0))" + }, + + { + "parameter" : "set_digits number value [number base] [list of number digits] [number start_digit] [number end_digit] [bool relative_to_zero]", + "output" : "number", + "new value" : "new", + "description" : "Evaluates to a number having each of the values in the list of digits replace each of the relative digits in value for the given base. If base is omitted, 10 is the default. The parameters start_digit and end_digit can be used to get a specific set of digits, but can also be infinite or null to catch all the digits on one side of the number. The interpretation of start_digit and end_digit are with respect to relative_to_zero, which defaults to true. If relative_to_zero is true, then the digits are indexed from their distance to zero, such as \"5 4 3 2 1 0 . -1 -2\". If relative_to_zero is false, then the digits are indexed from their most significant digit, such as \"0 1 2 3 4 5 . 6 7\". The default values of start_digit and end_digit are the most and least significant digits respectively.", + "example" : "(print (set_digits 16 8 (list 1 1)))\n(print (get_digits (set_digits 1234567.8 10 (list 1 0 1 0) 2 5 (false)) 10 2 5 (false)))" + }, + + { + "parameter" : "floor number x", + "output" : "int", + "new value" : "new", + "description" : "Evaluates to the mathematical floor of x.", + "example" : "(print (floor 1.5))" + }, + + { + "parameter" : "ceil number x", + "output" : "int", + "new value" : "new", + "description" : "Evaluates to the mathematical ceiling of x.", + "example" : "(print (ceil 1.5))" + }, + + { + "parameter" : "round number x [number significant_digits] [number significant_digits_after_decimal]", + "output" : "int", + "new value" : "new", + "description" : "Rounds the value x and evaluates to the new value. If only one parameter is specified, it rounds to the nearest integer. If significant_digits is specified, then it rounds to the specified number of significant digits. If significant_digits_after_decimal is specified, then it ensures that x will be rounded at least to the number of decimal points past the integer as specified, and takes priority over the significant_digits.", + "example" : "(print (round 12.7) \"\\n\")\n(print (round 12.7 1) \"\\n\")\n(print (round 123.45678 5) \"\\n\")\n(print (round 123.45678 2) \"\\n\")\n(print (round 123.45678 2 2) \"\\n\")" + }, + + { + "parameter" : "exp number x", + "output" : "number", + "new value" : "new", + "description" : "e^x", + "example" : "(print (exp 0.5))" + }, + + { + "parameter" : "log number x [number base]", + "output" : "number", + "new value" : "new", + "description" : "Log of x. If a base is specified, uses that base, otherwise defaults to natural log.", + "example" : "(print (log 0.5))" + }, + + { + "parameter" : "sin number theta", + "output" : "number", + "new value" : "new", + "description" : "sine", + "example" : "(print (sin 0.5))" + }, + + { + "parameter" : "cos number theta", + "output" : "number", + "new value" : "new", + "description" : "cosine", + "example" : "(print (cos 0.5))" + }, + + { + "parameter" : "acos number theta", + "output" : "number", + "new value" : "new", + "description" : "inverse cosine", + "example" : "(print (acos 0.5))" + }, + + { + "parameter" : "tan number theta", + "output" : "number", + "new value" : "new", + "description" : "tangent", + "example" : "(print (tan 0.5))" + }, + + { + "parameter" : "atan number theta [number divisor]", + "output" : "number", + "new value" : "new", + "description" : "Inverse tangent. If two numbers are provided, then it evaluates atan theta/divisor.", + "example" : "(print (atan 0.5))\n(print (atan 0.5 0.5))" + }, + + { + "parameter" : "sinh number theta", + "output" : "number", + "new value" : "new", + "description" : "hyperbolic sine", + "example" : "(print (sinh 0.5))" + }, + + { + "parameter" : "asinh number theta", + "output" : "number", + "new value" : "new", + "description" : "area hyperbolic sine", + "example" : "(print (asinh 0.5))" + }, + + { + "parameter" : "cosh number theta", + "output" : "number", + "new value" : "new", + "description" : "hyperbolic cosine", + "example" : "(print (cosh 0.5))" + }, + + { + "parameter" : "acosh number theta", + "output" : "number", + "new value" : "new", + "description" : "area hyperbolic cosine", + "example" : "(print (acosh 0.5))" + }, + + { + "parameter" : "tanh number theta", + "output" : "number", + "new value" : "new", + "description" : "hyperbolic tangent", + "example" : "(print (tanh 0.5))" + }, + + { + "parameter" : "atanh number theta", + "output" : "number", + "new value" : "new", + "description" : "area hyperbolic tanh", + "example" : "(print (atanh 0.5))" + }, + + { + "parameter" : "erf number errno", + "output" : "number", + "new value" : "new", + "description" : "error function", + "example" : "(print (erf 0.5))" + }, + + { + "parameter" : "tgamma number z", + "output" : "number", + "new value" : "new", + "description" : "true (complete) gamma function", + "example" : "(print (tgamma 0.5))" + }, + + { + "parameter" : "lgamma number z", + "output" : "number", + "new value" : "new", + "description" : "log-gamma function", + "example" : "(print (l-gamma 0.5))" + }, + + { + "parameter" : "sqrt number x", + "output" : "number", + "new value" : "new", + "description" : "Returns the square root of x.", + "example" : "(print (sqrt 0.5))" + }, + + { + "parameter" : "pow number base number exponent", + "output" : "number", + "new value" : "new", + "description" : "Returns the base raised to the exponent", + "example" : "(print (pow 0.5 2))" + }, + + { + "parameter" : "abs number x", + "output" : "number", + "new value" : "new", + "description" : "absolute value of x", + "example" : "(print (abs -0.5))" + }, + + { + "parameter" : "max [number x1] [number x2] ... [number xN]", + "output" : "number", + "concurrency" : true, + "description" : "maximum of all of the numbers", + "example" : "(print (max 0.5 1 7 9 -5))" + }, + + { + "parameter" : "min [number x1] [number x2] ... [number xN]", + "output" : "number", + "concurrency" : true, + "description" : "minimum of all of the numbers", + "example" : "(print (min 0.5 1 7 9 -5))" + }, + + { + "parameter" : "dot_product list|assoc x1 list|assoc x2", + "output" : "number", + "description" : "Evaluates to the sum of all element-wise products of x1 and x2.", + "example" : "(print (dot_product (list 0.5 0.25 0.25) (list 4 8 8)))" + }, + + { + "parameter" : "generalized_distance list|assoc|number weights list|assoc distance_types list|assoc attributes list|assoc|number deviations number p_value list|assoc|* vector1 [list|assoc|* vector2] [list value_names]", + "output" : "number", + "description" : "Computes the generalized norm between vector1 and vector2 (or an equivalent zero vector if unspecified) with parameter specified by the p_value (2 being Euclidian distance), using the numerical distance or edit distance as appropriate. The parameter value_names, if specified as a list of the names of the values, will transform via unzipping any assoc into a list for the respective parameter in the order of the value_names, or if a number will use the number repeatedly for every element. weights is a list of dimension weights to use for the query, each value mapping to its respective element in the vectors. If weights is null, then it will assume that the weights are 1 and additionally will ignore null values for the vectors instead of treating them as unknown differences. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal\" (checks for exact matches), \"continuous\" (takes the numeric difference between two values), \"cyclic\" (takes the numeric difference where the min and max wrap around), \"string\" (computes the edit distance between strings), and \"code\" (computes the edit distance between trees or graphs of code). \nFor attributes, the particular distance_types specifies what particular attributes are expected. In all cases, there is the option to specify a list of values, where the second last value is the difference to use when one of the values being compared is null, and the last value is the difference to use when both of the values are null. If the last value is omitted, it will use the second last value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations is a list of numbers that are used during distance calculation, per-element, prior to exponentiation. Specifying null as deviations is equivalent to setting each deviation to 0. If any vector value is null or evaluates to nan, or any of the differences between vector1 and vector2 evaluate to null or nan, then it will compute a corresponding maximum distance value based on the properties of the feature.", + "example" : "(print (generalized_distance 0.01 (null) (null) (list null (list 0 360)) (list 0.5 0.0) (list 0 2 3) (list 1 2 3)))\n(print (generalized_distance 0.01 (list 0.25 0.25 0.5) (null) (null) (null) (list 1 2 3) (list 0 2 3) ))\n(generalized_distance 1 (list 0.3333 0.3333 0.3333) (list 5 0) (null) (null) (list 1 2 3) (list 10 2 10) )" + }, + + { + "parameter" : "entropy list|assoc|number p [list|assoc|number q] [number p_exponent] [number q_exponent]", + "output" : "number", + "description" : "Computes a form of entropy on the specified vectors using nats (natural log, not bits) in the form of -sum p_i ln (p_i^pexponent * q_i^q_exponent). For both p and q, if p or q is a list of numbers, then it will treat each entry as being the probability of that element. If it is an associative array, then elements with matching keys will be matched. If p or q a number then it will use that value in place of each element. If p or q is null or not specified, it will be calculated as the reciprocol of the size of the other element (p_i would be 1/|q| or q_i would be 1/|p|). If either p_exponent or q_exponent is 0, then that exponent will be ignored. Shannon entropy can be computed by ignoring the q parameters, setting p_exponent to 1 and q_exponent to 0. KL-divergence can be computed by providing both p and q and setting p_exponent to -1 and q_exponent to 1. Cross-entorpy can be computed by setting p_exponent to 0 and q_exponent to 1.", + "example" : "(entropy (list 0.5 0.5))\n(entropy (list 0.5 0.5) (list 0.25 0.75) 1 -1)\n(entropy 0.5 (list 0.25 0.75) 1 -1)\n(entropy 0.5 (list 0.25 0.75) 0 1)" + }, + + { + "parameter" : "first [list|assoc|number|string data]", + "output" : "*", + "description" : "Evaluates to the first element. If data is a list, it will be the first element. If data is an assoc, it will evaluate to the first element by assoc storage, but order does not matter. If data is a string, it will be the first character. If data is a number, it will evaluate to 1 if nonzero, 0 if zero.", + "example" : "(print (first (list 4 9.2 \"this\")))\n(print (first (assoc a 1 b 2)))\n(print (first 3))\n(print (first 0))\n(print (first \"abc\"))\n(print (first \"\"))" + }, + + { + "parameter" : "tail [list|assoc|number|string data] [number retain_count]", + "output" : "list", + "description" : "Evaluates to everything but the first element. If data is a list, it will be a list of all but the first element. If data is an assoc, it will evaluate to the assoc without the first element by assoc storage order, but order does not matter. If data is a string, it will be all but the first character. If data is a number, it will evaluate to the value minus 1 if nonzero, 0 if zero. If a retain_count is specified, it will be the number of elements to retain. A positive number means from the end, a negative number means from the beginning. The default value is -1 (all but the first).", + "example" : "(print (tail (list 4 9.2 \"this\")))\n(print (tail (assoc a 1 b 2)))\n(print (tail 3))\n(print (tail 0))\n(print (tail \"abc\"))\n(print (tail \"\"))\n(print (tail (list 1 2 3 4 5 6) 2))" + }, + + { + "parameter" : "last [list|assoc|number|string data]", + "output" : "*", + "description" : "Evaluates to the last element. If it is a list, it will be the last element. If assoc, it will evaluate to the first element by assoc storage, because order does not matter. If it is a string, it will be the last character. If it is a number, it will evaluate to 1 if nonzero, 0 if zero.", + "example" : "(print (last (list 4 9.2 \"this\")))\n(print (last (assoc a 1 b 2)))\n(print (last 3))\n(print (last 0))\n(print (last \"abc\"))\n(print (last \"\"))" + }, + + { + "parameter" : "trunc [list|assoc|number|string data] [number retain_count]", + "output" : "list", + "description" : "Truncates, evaluates to everything but the last element. If data is a list, it will be a list of all but the last element. If data is an assoc, it will evaluate to the assoc without the first element by assoc storage order, because order does not matter. If data is a string, it will be all but the last character. If data is a number, it will evaluate to the value minus 1 if nonzero, 0 if zero. If truncate_count is specified, it will be the number of elements to retain. A positive number means from the beginning, a negative number means from the end. The default value is -1 (all but the last).", + "example" : "(print (trunc (list 4 9.2 \"this\")))\n(print (trunc (assoc \"a\" 1 \"b\" 2)))\n(print (trunc 3))\n(print (trunc 0))\n(print (trunc \"abc\"))\n(print (trunc \"\"))\n(print (trunc (list 1 2 3 4 5 6) -2))" + }, + + { + "parameter" : "append [list|assoc|* collection1] [list|assoc|* collection2] ... [list|assoc|* collectionN]", + "output" : "list|assoc", + "new value" : "new", + "description" : "Evaluates to a new list or assoc which merges all lists (collection1 through collectionN) based on parameter order. If any assoc is passed in, then returns an assoc (lists will be automatically converted to an assoc with the indices as keys and the list elements as values). If a non-list and non-assoc is specified, then it just adds that one element to the list", + "example" : "(print (append (list 1 2 3) (list 4 5 6) (list 7 8 9)))\n(print (append (list 1 2 3) (assoc \"a\" 4 \"b\" 5 \"c\" 6) (list 7 8 9) (assoc d 10 e 11)))\n(print (append (list 4 9.2 \"this\") \"end\"))\n(print (append (assoc 0 4 1 9.2 2 \"this\") \"end\"))" + }, + + { + "parameter" : "size [list|assoc|string collection] collection", + "output" : "number", + "new value" : "new", + "description" : "Evaluates to the size of the collection in number of elements. If collection is a string, returns the length in UTF-8 characters.", + "example" : "(print (size (list 4 9.2 \"this\")))\n(print (size (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\")))" + }, + + { + "parameter" : "range [* function] number low_endpoint number high_endpoint [number step_size]", + "output" : "list", + "new value" : "new", + "concurrency" : true, + "new target scope": true, + "description" : "Evaluates to a list with the range from low_endpoint to high_endpoint. The default step_size is 1. Evaluates to an empty list if the range is not valid. If four arguments are specified, then the function will be evaluated for each value in the range.", + "example" : "(print (range 0 10))\n(print (range 10 0))\n(print (range 0 5 0.0))" + }, + + { + "parameter" : "rewrite * function * target", + "output" : "*", + "new value" : "new", + "new target scope": true, + "description" : "Rewrites target by applying the function in a bottom-up manner. For each node in the target tree, pushes a new target scope onto the target stack, with target_value being the current node and target_index being to the index to the current node relative to the node passed into rewrite accessed via target, and evaluates function. Returns the resulting tree, after have been rewritten by function.", + "example" : "(print (rewrite\n (lambda (if (~ (target_value) 0) (+ (target_value) 1) (target_value)) )\n (list (assoc \"a\" 13)) ) )\n ;rewrite all integer additions into multiplies and then fold constants\n(print (rewrite\n (lambda\n ;find any nodes with a + and where its list is filled to its size with integers\n (if (and \n (= (get_type (target_value)) \"+\")\n (= (size (target_value)) (size (filter (lambda (~ (target_value) 0)) (target_value))) )\n )\n (reduce (lambda (* (target_value 1) (target_value)) ) (target_value))\n (target_value))\n )\n ;original code with additions to be rewritten\n (lambda\n (list (assoc \"a\" (+ 3 (+ 13 4 2)) )) )\n) )\n(print (rewrite\n (lambda\n (if (and \n (= (get_type (target_value)) \"+\")\n (= (size (target_value)) (size (filter (lambda (~ (target_value) 0)) (target_value))) )\n )\n (reduce (lambda (+ (target_value 1) (target_value)) ) (target_value))\n (target_value))\n )\n (lambda\n (+ (+ 13 4) (target_value 1)) )\n) )" + }, + + { + "parameter" : "map * function [list|assoc collection1] [list|assoc collection2] ... [list|assoc collectionN]", + "output" : "list", + "new value" : "new", + "concurrency" : true, + "new target scope": true, + "description" : "For each element in the collection, pushes a new target scope onto the stack, so that target_value accesses the element or elements in the list and target_index accesses the list or assoc index, with target representing the outer set of lists or assocs, and evaluates the function. Returns the list of results, mapping the list via the specified function. If multiple lists or assocs are specified, then it pulls from each list or assoc simultaneously (null if overrun or index does not exist) and (target_value) contains an array of the values in parameter order. Note that concurrency is only available when one collection is specified.", + "example" : "(print (map (lambda (* (target_value) 2)) (list 1 2 3 4)))\n(print (map (lambda (+ (target_value) (target_index))) (assoc 10 1 20 2 30 3 40 4)))\n(print (map\n (lambda\n (+ (get (target_value) 0) (get (target_value) 1) (get (target_value) 2))\n )\n (assoc \"0\" 0 \"1\" 1 \"a\" 3)\n (assoc \"a\" 1 \"b\" 4)\n (list 2 2 2 2)\n))" + }, + + { + "parameter" : "filter [* function] list|assoc collection", + "output" : "list|assoc", + "new value" : "new", + "concurrency" : true, + "new target scope": true, + "description" : "For each element in the collection, pushes a new target scope onto the stack, so that target_value accesses the element in the list and target_index accesses the list or assoc index, with target representing the original list or assoc, and evaluates the function. If function evaluates to true, then the element is put in a new list or assoc (matching the input type) that is returned. If function is omitted, then it will remove any elements in the collection that are null, .nan, or .nas string.", + "example" : "(print (filter (lambda (> (target_value) 2)) (list 1 2 3 4)))" + }, + + { + "parameter" : "weave [* function] list|immediate values1 [list|immediate values2] [list|immediate values3]...", + "output" : "list", + "new target scope": true, + "description" : "Interleaves the values lists optionally by applying a function. If only values1 is passed in, then it evaluates to values1. If values1 and values2 are passed in, or, if more values are passed in but function is null, it interleaves the two lists out to whichever list is longer, filling in the remainder with null, and if any value is an immediate, then it will repeat the immediate value. If the function is specified and not nulll, it pushes a new target scope onto the stack, so that target_value accesses a list of elements to be woven together from the list, and target_index accesses the list or assoc index, with target representing the original list or assoc. The function should evaluate to a list, and weave will evaluate to a concatenated list of all of the lists that the function evaluated to.", + "example" : "(print (weave (list 1 3 5) (list 2 4 6)) \"\\n\")\n(print (weave (lambda (list (apply \"min\" (target_value) ) ) (list 1 3 4 5 5 6) (list 2 2 3 4 6 7) )\"\\n\")\n(print (weave (lambda (if (<= (get (target_value) 0) 4) (list (apply \"min\" (target_value 1)) ) (target_value)) ) (list 1 3 4 5 5 6) (list 2 2 3 4 6 7) )\"\\n\")\n(print (weave (null) (list 2 4 6) (null) ) \"\\n\")" + }, + + { + "parameter" : "reduce * function list|assoc collection", + "output" : "*", + "new value" : "new", + "new target scope": true, + "description" : "For each element in the collection after the first one, it pushes a pair of new target scope onto the stack, so that target_value accesses a list of elements from the list, and target_index accesses the list or assoc index if it is not already reduced, with target representing the original list or assoc, and evaluates function. If the collection is empty, null is returned. if the collection is of size one, the single element is returned.", + "example" : "(print (reduce (lambda (* (target_value 1) (target_value))) (list 1 2 3 4)))" + }, + + { + "parameter" : "apply * to_apply [list|assoc collection]", + "output" : "*", + "new value" : "new", + "description" : "Creates a new list of the values of the elements of the collection, applies the type specified by to_apply, which is either the type corresponding to a string or the type of to_apply, and then evaluates it. If to_apply has any parameters, these are prepended to the collection as the first parameters. When no extra parameters are passed, it is roughly equivalent to (call (set_type list \"+\")).", + "example" : "(print (apply (lambda (+)) (list 1 2 3 4)))\n(print (apply (lambda (+ 5)) (list 1 2 3 4)) \"\n\")\n(print (apply \"+\" (list 1 2 3 4)))" + }, + + { + "parameter" : "reverse list l", + "output" : "list", + "new value" : "new", + "description" : "Returns a new list containing the list with its elements in reversed order.", + "example" : "(print (reverse (list 1 2 3 4 5)))" + }, + + { + "parameter" : "sort [* function] list l", + "output" : "list", + "new value" : "new", + "new target scope": true, + "description" : "Returns a new list containing the list with its elements sorted in increasing order. Numerical values come before strings, and code will be evaluated as the representative strings. If function is specified, it pushes a pair of new target scope onto the stack, so that target_value accesses a list of elements to from the list, and target_index accesses the list or assoc index if it is not already reduced, with target representing the original list or assoc, and evaluates function. The function should return a number, positive if \"(target_value)\" is greater, negative if \"(target_value 1)\" is greater, 0 if equal.", + "example" : "(print (sort (list 4 9 3 5 1)))\n(print (sort (list \"n\" \"b\" \"hello\" 4 1 3.2 (list 1 2 3))))\n(print (sort (list 1 \"1x\" \"10\" 20 \"z2\" \"z10\" \"z100\")))\n(print (sort (lambda (- (target_value) (target_value 1))) (list 4 9 3 5 1)))" + }, + + { + "parameter" : "indices list|assoc a", + "output" : "list of string|number", + "new value" : "new", + "description" : "Evaluates to the list of strings or numbers that comprise the indices or indexes for the list or associative list. It is guaranteed that the opcodes indices and values (assuming the parameter only_unique_values is not true) will evaluate and return elements in the same order when given the same node.", + "example" : "(print (indices (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\")))\n(print (indices (list \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\")))" + }, + + { + "parameter" : "values list|assoc a [bool only_unique_values]", + "output" : "list of *", + "description" : "Evaluates to the list of entities that comprise the values for the list or associative list. For a list, it evaluates to itself. If only_unique_values is true (defaults to false), then it will filter out any duplicate values and only return those that are unique (preserving order of first appearance). If only_unique_values is not true, then it is guaranteed that the opcodes indices and values will evaluate and return elements in the same order when given the same node.", + "example" : "(print (values (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\")))\n(print (values (list \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\")))" + }, + + { + "parameter" : "contains_index list|assoc a string|number|list index", + "output" : "bool", + "new value" : "new", + "description" : "Evaluates to true if the index is in the list or associative list. If index is a string, it will attempt to look at a as an assoc, if number, it will look at a as a list. If index is a list, it will traverse a via the elements in the list.", + "example" : "(print (contains_index (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\") \"c\"))\nprint (contains_index (list \"a\" 1 2 3 4 \"d\") 2))" + }, + + { + "parameter" : "contains_value list|assoc|string a string|number value", + "output" : "bool", + "new value" : "new", + "description" : "Evaluates to true if the value is a value in the list or associative list. If a is a string, then it uses value as a regular expression and evaluates to true if the regular expression matches.", + "example" : "(print (contains_value (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\") 1))\n(print (contains_value (list \"a\" 1 2 3 4 \"d\") 2))" + }, + + { + "parameter" : "remove list|assoc a number|string|list index", + "output" : "list|assoc", + "new value" : "new", + "description" : "Removes the index-value pair with index being the index in assoc or index of the list or assoc, returning a new list or assoc with that index removed. If index is a list of numbers or strings, then it will remove each of the requested indices. Negative numbered indices will count back from the end of a list.", + "example" : "(print (remove (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\") 4))\n(print (remove (list \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\") 4))\n (print (remove (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\") (list 4 \"a\") ))" + }, + + { + "parameter" : "keep list|assoc a number|string|list index", + "output" : "list|assoc", + "new value" : "new", + "description" : "Keeps only the index-value pair with index being the index in assoc or index of the list or assoc, returning a new list or assoc with that only that index. If index is a list of numbers or strings, then it will only keep each of the requested indices. Negative numbered indices will count back from the end of a list.", + "example" : "(print (keep (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\") 4))\n(print (keep (list \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\") 4))\n (print (keep (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\") (list 4 \"a\") ))" + }, + + { + "parameter" : "associate [* index1] [* value1] [* index2] [* value2] ... [* indexN] [* valueN]", + "output" : "assoc", + "new value" : "new", + "concurrency" : true, + "new target scope": true, + "description" : "Evaluates to the assoc, where each pair of parameters (e.g., index1 and value1) comprises a index/value pair. Pushes a new target scope such that (target), (target_index), and (target_value) access the assoc, the current index, and the current value.", + "example" : "(print (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\"))" + }, + + { + "parameter" : "zip [* function] list indices [* values]", + "output" : "assoc", + "new value" : "new", + "new target scope": true, + "description" : "Evaluates to a new assoc where the indices are the keys and the values are the values, with corresponding positions in the list matched. If the values is omitted, then it will use nulls for each of the values. If values is not a list, then all of the values in the assoc returned are set to the same value. When one parameter is specified, it is the list of indices. When two parameters are specified, it is the indices and values. When three values are specified, it is the function, indices and values. Values defaults to (null) and function defaults to (lambda (target_value)). When there is a collision of indices, the function is called, it pushes a pair of new target scope onto the stack, so that target_value accesses a list of elements from the list, target_index accesses the list or assoc index if it is not already reduced, with target representing the original list or assoc, evaluates function if one exists, and (target_value) is the new value attempted to be inserted over (target_value 1).", + "example" : "(print (zip (list \"a\" \"b\" \"c\" \"d\") (list 1 2 3 4)))" + }, + + { + "parameter" : "unzip [list|assoc values] list indices", + "output" : "list", + "new value" : "new", + "description" : "Evaluates to a new list, using the indices list to look up each value from the values list or assoc, in the same order as each index is specified in indices.", + "example" : "(print (unzip (assoc \"a\" 1 \"b\" 2 \"c\" 3) (list \"a\" \"b\")))\n(print (unzip (list 1 2 3) (list 0 -1 1)))" + }, + + { + "parameter" : "get * data [number index|string index|list walk_path_1] [number index|string index|list walk_path_2] ...", + "output" : "*", + "description" : "Evaluates to data as traversed by the set of values specified by the second parameter, which can be any of: a number, representing an index, with negative numbers representing backward traversal from the end of the list; a string, representing the index; or a list, representing a way to walk into the structure as the aforementioned values. If multiple walk paths are specified, then get returns a list, where each element in the list is the respective element retrieved by the respective walk path. If the walk path continues past the data structure, it will return a (null).", + "example" : "(print (get (list 1 2 3)))\n(print (get (list 4 9.2 \"this\") 1))\n(print (get (assoc \"a\" 1 \"b\" 2 \"c\" 3 4 \"d\") \"c\"))\n(print (get (list 0 1 2 3 (list 0 1 2 (assoc \"a\" 1))) (list 4 3 \"a\")))\n (print (get (list 4 9.2 \"this\") 1 2) \"\\n\")" + }, + + { + "parameter" : "set * data [number index1|string index1|list walk_path1] [* new_value1] [number index2|string index2|list walk_path2] [* new_value2] ...", + "output" : "*", + "new value" : "new", + "description" : "Performs a deep copy on data (a copy of all data structures referenced by it and its references), then looks at the remaining parameters as pairs. For each pair, the first is any of: a number, representing an index, with negative numbers representing backward traversal from the end of the list; a string, representing the index; or a list, representing a way to walk into the structure as the aforementioned values. new_value1 to new_valueN represent a value that will be used to replace whatever is in the location the preceeding location parameter specifies. If a particular location does not exist, it will be created assuming the most generic type that will support the index (as a null, list, or assoc); however, it will not change the type of immediate values to an assoc or list. Note that the target operation will evaluate to the new copy of data, which is the base of the newly constructed data; this is useful for creating circular references.", + "example" : "(print (set (list 1 2 3 4) 2 7))\n(print (set\n (list (assoc \"a\" 1))\n (list 2) 1\n (list 1) (get (target) 0)))" + }, + + { + "parameter" : "replace * data [number index1|string index1|list walk_path1] [* function1] [number index2|string index2|list walk_path2] [* function2] ...", + "output" : "*", + "new value" : "new", + "new target scope": true, + "description" : "Performs a deep copy on data (a copy of all data structures referenced by it and its references), then looks at the remaining parameters as pairs. For each pair, the first is any of: a number, representing an index, with negative numbers representing backward traversal from the end of the list; a string, representing the index; or a list, representing a way to walk into the structure as the aforementioned values. function1 to functionN represent a function that will be used to replace in place of whatever is in the location, and will be passed the current node in (target_value). The function does not need to be a function and can just be a constant (which it will be evaluated as). If a particular location does not exist, it will be created assuming the most generic type that will support the index (as a null, list, or assoc). Note that the target operation will evaluate to the new copy of data, which is the base of the newly constructed data; this is useful for creating circular references.", + "example" : "(print (replace (list (assoc \"a\" 13)) ))\n(print (replace\n (list (assoc \"a\" 1))\n (list 2) 1\n (list 0) (list 4 5 6)))\n\n(print (replace\n (list (assoc \"a\" 1))\n (list 0) (lambda (set (target_value) \"b\" 2))\n ))" + }, + + { + "parameter" : "target [number stack_distance]", + "output" : "*", + "description" : "Evaluates to the current node that is being iterated over, or the base code of a set or replace that is being created. If a number is specified, it climbs back up the target stack that many levels. Useful for seralizing graph data structures or looking up data during iteration.", + "example" : ";prints the list of what has been created before its return value is included in the list\n(list 1 2 3 (print (target)) 4)\n (let (assoc moveref (list 0 (list 7 8) (get (target 0) 1) ) )\n (assign (assoc moveref (set moveref 1 1)))\n (print moveref)\n)" + }, + + { + "parameter" : "target_index [number stack_distance]", + "output" : "*", + "new value" : "new", + "description" : "Like target, but evaluates to the index of the current node being iterated on within target.", + "example" : "(list 1 2 3 (print (target_index)) 4)" + }, + + { + "parameter" : "target_value [number stack_distance]", + "output" : "*", + "description" : "Like target, but evaluates to the current node being iterated on within target.", + "example" : "(list 1 2 3 (print (target_value)) 4)" + }, + + { + "parameter" : "stack", + "output" : "*", + "description" : "Evaluates to the current execution context, also known as the scope stack.", + "example" : "(print (stack))" + }, + + { + "parameter" : "args [number stack_distance]", + "output" : "assoc", + "description" : "Evaluates to the top context of the stack, the current execution context, or scope stack, known as the arguments. If number is specified, then it evaluates to the context that many layers up the stack.", + "example" : "(let (assoc \"bbb\" 3)\n (print (args))\n)" + }, + + { + "parameter" : "and [bool condition1] [bool condition2] ... [bool conditionN]", + "output" : "*", + "new value" : "c", + "concurrency" : true, + "description" : "If all condition expressions are true, evaluates to conditionN. Otherwise evaluates to false.", + "example" : "(print (and 1 4.8 \"true\"))\n(print (and 1 0.0 \"true\"))" + }, + + { + "parameter" : "or [bool condition1] [bool condition2] ... [bool conditionN]", + "output" : "*", + "new value" : "c", + "concurrency" : true, + "description" : "If all condition expressions are false, evaluates to false. Otherwise evaluates to the first condition that is true.", + "example" : "(print (or 1 4.8 \"true\"))\n(print (or 1 0.0 \"true\"))\n(print (or 0 0.0 \"\"))" + }, + + { + "parameter" : "xor [bool condition1] [bool condition2] ... [bool conditionN]", + "output" : "*", + "new value" : "new", + "concurrency" : true, + "description" : "If an even number of condition expressions are true, evaluates to false. Otherwise evaluates to true.", + "example" : "(print (xor 1 4.8 \"true\"))\n(print (xor 1 0.0 \"true\"))" + }, + + { + "parameter" : "not bool condition", + "output" : "bool", + "new value" : "new", + "description" : "Evaluates to false if condition is true, true if false.", + "example" : "(print (not 1))\n(print (not \"\"))" + }, + + { + "parameter" : "= [* node1] [* node2] ... [* nodeN]", + "output" : "bool", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to true if all values are equal (will recurse into data structures), false otherwise. Values of nan (not a number) are considered equal because they represent the same node, unlike many other floating point representation systems.", + "example" : "(print (= 4 4 5))\n(print (= 4 4 4))" + }, + + { + "parameter" : "!= [* node1] [* node2] ... [* nodeN]", + "output" : "bool", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to true if no two values are equal (will recurse into data structures), false otherwise.", + "example" : "(print (!= 4 4))\n(print (!= 4 5))\n(print (!= 4 4 5))\n(print (!= 4 4 4))\n(print (!= 4 4 \"hello\" 4))\n(print (!= 4 4 4 1 3.0 \"hello\"))\n(print (!= 1 2 3 4 5 6 \"hello\"))\n" + }, + + { + "parameter" : "< [* node1] [* node2] ... [* nodeN]", + "output" : "bool", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to true if all values are in strict increasing order, false otherwise.", + "example" : "(print (< 4 5))\n(print (< 4 4))\n(print (< 4 5 6))\n(print (< 4 5 6 5))\n" + }, + + { + "parameter" : "<= [* node1] [* node2] ... [* nodeN]", + "output" : "bool", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to true if all values are in nondecreasing order, false otherwise.", + "example" : "(print (<= 4 5))\n(print (<= 4 4))\n(print (<= 4 5 6))\n(print (<= 4 5 6 5))" + }, + + { + "parameter" : "> [* node1] [* node2] ... [* nodeN]", + "output" : "bool", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to true if all values are in strict decreasing order, false otherwise.", + "example" : "(print (> 6 5))\n(print (> 4 4))\n(print (> 6 5 4))\n(print (> 6 5 4 5))" + }, + + { + "parameter" : ">= [* node1] [* node2] ... [* nodeN]", + "output" : "bool", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to true if all values are in nonincreasing order, false otherwise.", + "example" : "(print (>= 6 5))\n(print (>= 4 4))\n(print (>= 6 5 4))\n(print (>= 6 5 4 5))" + }, + + { + "parameter" : "~ [* node1] [* node2] ... [* nodeN]", + "output" : "bool", + "new value" : "new", + "concurrency" : true, + "description" : "Evaluates to true if all values are of the same data type, false otherwise.", + "example" : "(print (~ 1 4 5))\n(print (~ 1 4 \"a\"))" + }, + + { + "parameter" : "!~ [* node1] [* node2] ... [* nodeN]", + "output" : "bool", + "new value" : "new", + "description" : "Evaluates to true if no two values are of the same data types, false otherwise.", + "example" : "(print (!~ \"true\" \"false\" (list 3 2)))\n(print (!~ \"true\" 1 (list 3 2)))" + }, + + { + "parameter" : "rand [list|number range] [number number_to_generate] [bool unique]", + "output" : "*", + "new value" : "new, unless range is a list", + "description" : "With no parameters, evaluates to a random number between 0.0 and 1.0. Each entity has its own random stream, and if called from a sandbox, then it uses a new stream without interrupting the stream of the calling entity. If the parameter is a list, it will uniformly randomly choose and evaluate to one element of the list. If number, it will evaluate to a value greater than or equal to zero and less than the number specified. If number_to_generate is specified, it will generate a list of multiple values (even if number_to_generate is 1). If unique is true (it defaults to false), then it will only return unique values, the same as selecting from the list or assoc without replacement.", + "example" : "(print (rand))\n(print (rand 50))\n(print (rand (list 1 2 4 5 7)))\n(print (rand (range 0 10) 10 (true)) \"\\n\")" + }, + + { + "parameter" : "weighted_rand [list of lists|assoc weighted_values] [number number_to_generate] [bool unique]", + "output" : "*", + "description" : "Each entity has its own random stream, and if called from a sandbox, then it uses a new stream without interrupting the stream of the calling entity. If the parameter is a list, it will uniformly randomly choose and evaluate to one element of the list. If an assoc, then it will randomly evaluate to one of the keys using the values as the weights for the probabilities. Nans and negative numbers are treated as zero. Infinities are normalized as to only select from infinities in the list. If all values are 0, then they are normalized to having the same weight. If a list of lists, it will use the first list as a list of values and the second list as a list of weights and otherwise work like it would for an assoc. If number_to_generate is specified, it will generate a list of multiple values (even if number_to_generate is 1). If unique is true (it defaults to false), then it will only return unique values, the same as selecting from the list or assoc without replacement.", + "example" : "(print (rand (list (list 1 2 4 5 7) (list 0.2 0.2 0.1 0.1 0.4))))\n(print (rand (assoc \"a\" 1 \"b\" 3))\n(print (rand (assoc \"a\" .25 \"b\" .75)) \"\\n\")\n(print (rand (assoc \"a\" .25 \"b\" .75) 4) \"\\n\")\n(print (rand (range 0 10) 10 (true)) \"\\n\")" + }, + + { + "parameter" : "get_rand_seed", + "output" : "string", + "permissions" : "", + "new value" : "new", + "description" : "Evaluates to a string representing the current state of the random number generator used for the rand command for the entity specified by id.", + "example" : "(print (get_rand_seed) \"\\n\")" + }, + + { + "parameter" : "set_rand_seed * node", + "output" : "string", + "permissions" : "", + "description" : "Sets the random number seed and state for the current random number stream without affecting any entity. If node is already a string in the proper format output by get_entity_rand_seed, then it will set the random generator to that current state, picking up where the previous state left off. If it is anything else, it uses the value as a random seed to start the genrator.", + "example" : " (declare (assoc cur_seed (get_rand_seed)))\n (print (rand) \"\\n\")\n (set_rand_seed cur_seed)\n (print (rand) \"\\n\")" + }, + + { + "parameter" : "system_time", + "output" : "number", + "permissions" : "r", + "description" : "Evaluates to the current system time since epoch in seconds (including fractions of seconds).", + "example" : "(print (system_time))" + }, + + { + "parameter" : "true", + "output" : "immediate 1", + "new value" : "new", + "description" : "Evaluates to the immediate value true.", + "example" : "(print (true))" + }, + + { + "parameter" : "false", + "output" : "immediate 0", + "new value" : "new", + "description" : "Evaluates to the immediate value false.", + "example" : "(print (false))" + }, + + { + "parameter" : "null", + "output" : "immediate null", + "description" : "Evaluates to the immediate null value.", + "example" : "(print (null))\n(print (lambda (null (+ 3 5) 7)) )\n(print (lambda (#nulltest null)))" + }, + + { + "parameter" : "list [* node1] [* node2] ... [* nodeN]", + "output" : "list of *", + "new value" : "new", + "concurrency" : true, + "new target scope": true, + "description" : "Evaluates to the list specified by the parameters. Pushes a new target scope such that (target), (target_index), and (target_value) access the list, the current index, and the current value.", + "example" : "(print (list \"a\" 1 \"b\"))" + }, + + { + "parameter" : "assoc [bstring index1] [* value1] [bstring index1] [* value2] ...", + "output" : "assoc", + "new value" : "new", + "concurrency" : true, + "new target scope": true, + "description" : "Evaluates to the associative list, where each pair of parameters (e.g., index1 and value1) comprises a index/value pair. Pushes a new target scope such that (target), (target_index), and (target_value) access the assoc, the current index, and the current value. If any of the bstrings do not have reserved characters or spaces, then quotes are optional; if spaces or reserved characters are present, then quotes are required.", + "example" : "(print (assoc b 2 c 3))\n(print (assoc a 1 \"b\\ttab\" 2 c 3 4 \"d\"))" + }, + + { + "parameter" : "[number]", + "output" : "number", + "description" : "A 64-bit floating point value", + "example" : "4\n2.22228" + }, + + { + "parameter" : "[string]", + "output" : "number", + "description" : "A string.", + "example" : "\"hello\"" + }, + + { + "parameter" : "[symbol]", + "output" : "string", + "description" : "A string representing an internal symbol (a variable).", + "example" : "my_variable" + }, + + { + "parameter" : "get_type * node", + "output" : "*", + "new value" : "new", + "description" : "Returns a node of the type corresponding to the node.", + "example" : "(print (get_type (lambda (+ 3 4))))" + }, + + { + "parameter" : "get_type_string * node", + "output" : "string", + "new value" : "new", + "description" : "Returns a string that represents the type corresponding to the node.", + "example" : "(print (get_type_string (lambda (+ 3 4))))" + }, + + { + "parameter" : "set_type * node1 [string|* type]", + "output" : "*", + "new value" : "c", + "description" : "Creates a copy of node1, setting the type of the node of to whatever node type is specified by string or to the same type as the top node of type. It will convert the parameters to or from assoc if necessary.", + "example" : "(print (set_type (lambda (+ 3 4)) \"-\"))\n(print (set_type (assoc \"a\" 4 \"b\" 3) \"list\"))\n(print (set_type (assoc \"a\" 4 \"b\" 3) (list)))\n(print (set_type (list \"a\" 4 \"b\" 3) \"assoc\"))\n(print (call (set_type (list 1 0.5 \"3.2\" 4) \"+\")))" + }, + + { + "parameter" : "format * data string from_format string to_format [assoc from_params] [assoc to_params]", + "output" : "*", + "new value" : "new", + "description" : "Converts data from from_format into to_format. Supported language types are \"number\", \"string\", and \"code\", where code represents everything beyond number and string. Beyond the supported language types, additional formats that are stored in a binary string. The additional formats are \"Base16\", \"Base64\", \"int8\", \"uint8\", \"int16\", \"uint16\", \"int32\", \"uint32\", \"int64\", \"uint64\", \"float\", \"double\", \"INT8\", \"UINT8\", \"INT16\", \"UINT16\", \"INT32\", \"UINT32\", \"INT64\", \"UINT64\", \"FLOAT\", \"DOUBLE\", \"json\", \"yaml\", and \"date\" (though date is a special case). Lower case binary types names represent little endian and upper case binary type names represent big endian, and binary types will be handled as strings. The \"date\" type requires additional information. Following \"date\" is a colon, followed by a standard strftime date format string. If from_params or to_params are specified, then it will apply the appropriate from or to as appropriate. If the format is either \"string\", \"json\", or \"yaml\", then the key \"sort_keys\" can be used to specify a boolean value, if true, then it will sort the keys, otherwise the default behavior is to emit the keys based on memory layout. If the format is date, then the to or from params can be an assoc with \"locale\" and \"timezone\" as optional keys, with the values representing the locale and timezone. The locale is provided, then it will leverage operating system support to apply appropriate formatting, such as en_US. Note that UTF-8 is assumed and automatically added to the locale. If no locale is specified, then the default will be used. If converting to or from dates, if timezone is specified, it will use the standard timezone name, if unspecified or empty string, it will assume the current time zone.", + "example" : "(print (format 65 \"number\" \"int8\") \"\\n\")\n(print (format (format -100 \"number\" \"double\") \"double\" \"number\") \"\\n\")" + }, + + { + "parameter" : "get_labels * node", + "output" : "list of string", + "new value" : "new", + "description" : "Returns a list of strings comprising all of the labels for the particular node of *.", + "example" : "(print (get_labels ( #labelA lambda #labelB (true))))" + }, + + { + "parameter" : "get_all_labels * node", + "output" : "assoc", + "description" : "Returns an associative list of the labels for the node of code and everything underneath it, where the index is the label and the value is the reference to *.", + "example" : "(print (get_all_labels (lambda (#label21 print \"hello world: \" (* #label-number-22 3 4) #label23 \" and \" (* 1 2) )) ))\n(print (get_all_labels (lambda\n ( #labelA #labelQ * #labelB\n (+ 1 #labelA 3) 2))))" + }, + + { + "parameter" : "set_labels * node (list [string new_label1]...[string new_labelN])", + "output" : "*", + "new value" : "new", + "description" : "Sets the labels for the node of code. Evaluates to the node represented by the input node.", + "example" : "(print (set_labels\n ( lambda\n (#labelC true)) (list \"labelD\" \"labelE\")))" + }, + + { + "parameter" : "zip_labels list labels * to_add_labels", + "output" : "*", + "new value" : "new", + "description" : "For each of the values in to_add_labels, it takes respective value for labels and applies that string as a label to the respective value, and returns a new set of values with the labels.", + "example" : "(print (zip_labels (list \"l1\" \"l2\" \"l3\") (list 1 2 3)))" + }, + + { + "parameter" : "get_comments * node", + "output" : "string", + "new value" : "new", + "description" : "Returns a strings comprising all of the comments for the input node.", + "example" : "(print (get_comments\n ;this is a comment\n (lambda ;comment too\n (true))))" + }, + + { + "parameter" : "set_comments * node [string new_comment]", + "output" : "*", + "new value" : "", + "description" : "Sets the comments for the node of code. Evaluates to the node represented by new_comment.", + "example" : "(print (set_comments\n ;this is a comment\n (lambda ;comment too\n (true)) \"new comment\"))" + }, + + { + "parameter" : "get_concurrency * node", + "output" : "bool", + "new value" : "new", + "description" : "Returns true if the node has a preference to be processed in a manner where its operations are run concurrently (and potentially subject to race conditions). False if it is not.", + "example" : "(print (get_concurrency (lambda ||(map foo array))) \"\n\")" + }, + + { + "parameter" : "set_concurrency * node bool concurrent", + "output" : "*", + "new value" : "new", + "description" : "Sets whether the node has a preference to be processed in a manner where its operations are run concurrently (and potentially subject to race conditions). Evaluates to the node represented by the input node.", + "example" : "(print (set_concurrency (lambda (map foo array)) (true)) \"\n\")" + }, + + { + "parameter" : "get_value * node", + "output" : "*", + "new value" : "new", + "description" : "Returns just the value portion of node (no labels or comments). Will evaluate to a copy of the value if it is not a unique reference, making it useful to ensure that the copy of the data is unique.", + "example" : "(print (get_value\n ;this is a comment\n (lambda ;comment too\n #withalabel (true))))" + }, + + { + "parameter" : "set_value * target * val", + "output" : "*", + "new value" : "new", + "description" : "Sets target's value to the value of val, keeping existing labels, and comments).", + "example" : "(print (set_value\n ;this is a comment\n (lambda ;comment too\n (true)) 3))" + }, + + { + "parameter" : "explode [string str] [number stride]", + "output" : "list of string", + "new value" : "new", + "description" : "Explodes string str into the pieces that make it up. If stride is zero or unspecified, then it explodes the string by character per UTF-8 parsing. If stride is specified, then it breaks it into chunks of that many bytes. For example, a stride of 1 would break it into bytes, whereas a stride of 4 would break it into 32-bit chunks.", + "example" : "(print (explode \"test\"))\n(print (explode \"test\" 2))" + }, + + { + "parameter" : "split [string str] [string split_string] [number max_split_count] [number stride]", + "output" : "list of string", + "new value" : "new", + "description" : "Splits the string str into a list of strings based on the split_string, which is handled as a regular expression. Any data matching split_string will not be included in any of the resulting strings. If max_split_count is provided and greater than zero, it will only split up to that many times. If stride is zero or unspecified, then it explodes the string by character per UTF-8 parsing. If stride is specified and a value other than zero, then it does not use split_string as a regular expression but rather a string, and it breaks the result into chunks of that many bytes. For example, a stride of 1 would break it into bytes, whereas a stride of 4 would break it into 32-bit chunks.", + "example" : "(print (split \"hello world\" \" \"))" + }, + + { + "parameter" : "substr [string str] [number|string location] [number|string param] [string replacement] [number stride]", + "output" : "string | list of string | list of list of string", + "new value" : "new", + "description" : "Finds a substring of string str. If location is a number, then evaluates to a new string representing the substring starting at offset, but if location is a string, then it will treat location as a regular expression. If param is specified, if location is a number it will go until that length beyond the offset, and if location is a regular expression param will represent one of the following: if null or \"first\", then it will return the first match of the regular expression; if param is a number or the string \"all\", then substr will evaluate to a list of up to param matches (which may be infinite yielding the same result as \"all\"). If param is a negative number or the string \"submatches\", then it will return a list of list of strings, for each match up to the count of the negative number or all matches if \"submatches\", each inner list will represent the full regular expression match followed by each submatch as captured by parenthesis in the regular expression, ordered from an outer to inner, left-to-right manner. If location is a number and offset or length are negative, then it will measure from the end of the string rather than the beginning. If replacement is specified and not null, it will return the original string rather than the substring, but the substring will be replaced by replacement regardless of what location is; and if replacement is specified, then it will override some of the logic for the param type and always return just a string and not a list. If stride is zero or unspecified, then it explodes the string by character per UTF-8 parsing. If stride is specified, then it breaks it into chunks of that many bytes. For example, a stride of 1 would break it into bytes, whereas a stride of 4 would break it into 32-bit chunks.", + "example" : "(print (substr \"hello world\" 5))" + }, + + { + "parameter" : "concat [string str1] [string str2] ... [string strN]", + "output" : "string", + "new value" : "new", + "description" : "Concatenates all strings and evaluates to the single string that is the result.", + "example" : "(print (concat \"hello\" \" \" \"world\"))" + }, + + { + "parameter" : "crypto_sign string message string secret_key", + "output" : "string", + "new value" : "new", + "description" : "Signs the message given the secret key and returns the signature using the Ed25519 algorithm. Note that the message is not included in the signature. The system opcode using the command sign_key_pair can be used to create a public/secret key pair.", + "example" : "(print (crypto_sign \"hello world\" secret_key))" + }, + + { + "parameter" : "crypto_sign_verify string message string public_key string signature", + "output" : "bool", + "new value" : "new", + "description" : "Verifies that the message was signed with the signature via the public key using the Ed25519 algorithm and returns true if the signature is valid, false otherwise. Note that the message is not included in the signature. The system opcode using the command sign_key_pair can be used to create a public/secret key pair.", + "example" : "(print (crypto_sign_verify \"hello world\" public_key signature))" + }, + + { + "parameter" : "encrypt string plaintext_message string key1 [string nonce] [string key2]", + "output" : "string", + "new value" : "new", + "description" : "If key2 is not provided, then it uses the XSalsa20 algorithm to perform shared secret key encryption on the message, returning the encrypted value. If key2 is provided, then the Curve25519 algorithm will additionally be used, and key1 will represent the receiver's public key and key2 will represent the sender's secret key. The nonce is a string of bytes up to 24 bytes long, that will be used to randomize the encryption, and will need to be provided to the decryption in order to work. Nonces are not technically required, but strongly recommended to prevent replay attacks. The system opcode using the command encrypt_key_pair can be used to create a public/secret key pair.", + "example" : "(print (encrypt \"hello world\" shared_secret_key nonce))\n(print (encrypt \"hello world\" sender_secret_key nonce receiver_public_key))" + }, + + { + "parameter" : "decrypt string cyphertext_message string key1 [string nonce] [string key2]", + "output" : "string", + "new value" : "new", + "description" : "If key2 is not provided, then it uses the XSalsa20 algorithm to perform shared secret key decryption on the message, returning the encrypted value. If key2 is provided, then the Curve25519 algorithm will additionally be used, and key1 will represent the sender's public key and key2 will represent the receiver's secret key. The nonce is a string of bytes up to 24 bytes long, that will be used to randomize the encryption, and will need to be provided to the decryption in order to work. Nonces are not technically required, but strongly recommended to prevent replay attacks. The system opcode using the command encrypt_key_pair can be used to create a public/secret key pair.", + "example" : "(print (decrypt \"hello world\" shared_secret_key nonce))\n(print (decrypt \"hello world\" sender_public_key nonce receiver_secret_key))" + }, + + { + "parameter" : "print [* node1] [* node2] ... [* nodeN]", + "output" : "null", + "description" : "Prints each of the parameters in order in a manner interpretable as if they were code. Output is pretty-printed. Printing a node which evaluates to a literal string or number will not be printed (the value will be printed directly) and not have a newline appended.", + "example" : "(print \"hello\")" + }, + + { + "parameter" : "total_size * node", + "output" : "number", + "new value" : "new", + "description" : "Evaluates to the total count of all of the nodes referenced within the input node. Each label on a node counts for an additional node. The volume of data in an individual node (such as in a string) counts as an additional node for each 48 characters.", + "example" : "(print (total_size (list 1 2 3 (assoc \"a\" 3 \"b\" 4) (list 5 6))))" + }, + + { + "parameter" : "mutate * node [number mutation_rate] [assoc mutation_weights] [assoc operation_type]", + "output" : "*", + "new value" : "new", + "description" : "Evaluates to a mutated version of the input node. The value specified in mutation_rate, from 0.0 to 1.0 and defaulting to 0.00001, indicates the probability that any node will experience a mutation. The parameter mutation_weights is an assoc where the keys are the allowed opcode names and the values are the probabilities that each opcode would be chosen; if null or unspecified, it defaults to all opcodes each with their own default probability. The operation_type is an assoc where the keys are mutation operations and the values are the probabilities that the operations will be performed. The operations can consist of the strings change_type, delete, insert, swap_elements, deep_copy_elements, delete, and change_label.", + "example" : "(print (mutate\n (lambda (list 1 2 3 4 5 6 7 8 9 10 11 12 13 14 (assoc \"a\" 1 \"b\" 2)))\n0.4))\n" + }, + + { + "parameter" : "commonality * node1 * node2 [bool use_string_edit_distance]", + "output" : "number", + "new value" : "new", + "description" : "Evaluates to the total count of all of the nodes referenced within node1 and node2 that are equivalent, using fractions to represent somewhat similar nodes. If use_string_edit_distance is true and node1 and node2 are both string literals, string edit distance will be used to calculate commonality.", + "example" : "(print (commonality\n (lambda (seq 2 (get_entity_comments) 1))\n (lambda (seq 2 1 4 (get_entity_comments)))\n))\n (print (commonality\n (list 1 2 3 (assoc \"a\" 3 \"b\" 4) (lambda (if true 1 (parallel (get_entity_comments) 1))) (list 5 6))\n (list 1 2 3 (assoc \"c\" 3 \"b\" 4) (lambda (if true 1 (parallel 1 (get_entity_comments)))) (list 5 6))\n))" + }, + + { + "parameter" : "edit_distance * node1 * node2 [bool use_string_edit_distance]", + "output" : "number", + "new value" : "new", + "description" : "Evaluates to the number of nodes that are different between 1 and 2, using fractions to represent somewhat similar nodes. If use_string_edit_distance is true and node1 and node2 are both string literals, string edit distance will be calculated.", + "example" : "(print (edit_distance\n (lambda (seq 2 (get_entity_comments) 1))\n (lambda (seq 2 1 4 (get_entity_comments)))\n))\n (print (edit_distance\n (list 1 2 3 (assoc \"a\" 3 \"b\" 4) (lambda (if true 1 (parallel (get_entity_comments) 1))) (list 5 6))\n (list 1 2 3 (assoc \"c\" 3 \"b\" 4) (lambda (if true 1 (parallel 1 (get_entity_comments)))) (list 5 6))\n))" + }, + + { + "parameter" : "intersect * node1 * node2", + "output" : "*", + "new value" : "new", + "description" : "Evaluates to whatever is common between node1 and node2 exclusive.", + "example" : "(print (intersect\n (list 1 (lambda (- 4 2)) (assoc \"a\" 3 \"b\" 4))\n (list 1 (lambda (- 4 2)) (assoc \"c\" 3 \"b\" 4))\n))\n\n(print (intersect\n (lambda (seq 2 (get_entity_comments) 1))\n (lambda (seq 2 1 4 (get_entity_comments)))\n))\n \n(print (intersect\n (lambda (parallel 2 (get_entity_comments) 1))\n (lambda (parallel 2 1 4 (get_entity_comments)))\n))\n\n(print (intersect\n (list 1 2 3 (assoc \"a\" 3 \"b\" 4) (lambda (if true 1 (parallel (get_entity_comments) #label-not-1 1))) (list 5 6))\n (list 1 2 3 (assoc \"c\" 3 \"b\" 4) (lambda (if true 1 (parallel #label-not-1 1 (get_entity_comments)))) (list 5 6))\n))\n \n(print (intersect\n (lambda (list 1 (assoc \"a\" 3 \"b\" 4)))\n (lambda (list 1 (assoc \"c\" 3 \"b\" 4)))\n))\n\n(print (intersect\n (lambda (replace 4 2 6 1 7))\n (lambda (replace 4 1 7 2 6))\n))" + }, + + { + "parameter" : "union * node1 * node2", + "output" : "*", + "new value" : "new", + "description" : "Evaluates to whatever is inclusive when merging node1 and node2.", + "example" : "(print (union\n (lambda (seq 2 (get_entity_comments) 1))\n (lambda (seq 2 1 4 (get_entity_comments)))\n))\n\n(print (union\n (list 1 (lambda (- 4 2)) (assoc \"a\" 3 \"b\" 4))\n (list 1 (lambda (- 4 2)) (assoc \"c\" 3 \"b\" 4))\n))\n \n(print (union\n (lambda (parallel 2 (get_entity_comments) 1))\n (lambda (parallel 2 1 4 (get_entity_comments)))\n))\n\n(print (union\n (list 1 2 3 (assoc \"a\" 3 \"b\" 4) (lambda (if true 1 (parallel (get_entity_comments) #label-not-1 1))) (list 5 6))\n (list 1 2 3 (assoc \"c\" 3 \"b\" 4) (lambda (if true 1 (parallel #label-not-1 1 (get_entity_comments)))) (list 5 6))\n))\n \n(print (union\n (lambda (list 1 (assoc \"a\" 3 \"b\" 4)))\n (lambda (list 1 (assoc \"c\" 3 \"b\" 4)))\n))\n\n" + }, + + { + "parameter" : "difference * node1 * node2", + "output" : "*", + "new value" : "new", + "description" : "Finds the difference between node1 and node2, and generates code that, if evaluated passing node1 as its parameter \"_\", would turn it into node2. Useful for finding the smallest set of what needs to be changed to apply it to new (and possibly slightly different) data or code.", + "example" : "(print (difference\n (lambda (assoc a 1 b 2 c 4 d 7 e 10 f 12 g 13))\n (lambda (list a 2 c 4 d 6 q 8 e 10 f 12 g 14))\n))\n(print (difference\n (assoc a 1 b 2 c 4 d 7 e 10 f 12 g 13)\n (assoc a 2 c 4 d 6 q 8 e 10 f 12 g 14)\n))\n(print (difference\n (lambda (list 1 2 4 7 10 12 13))\n (lambda (list 2 4 6 8 10 12 14))\n))\n(print (difference\n (lambda (assoc a 1 b 2 c 4 d 7 e 10 f 12 g 13))\n (lambda (assoc a 2 c 4 d 6 q 8 e 10 f 12 g 14))\n))\n\n(print (difference\n (lambda (assoc a 1 g (list 1 2)))\n (lambda (assoc a 2 g (list 1 4)))\n))\n\n(print (difference\n (lambda (assoc a 1 g (list 1 2)))\n (lambda (assoc a 2 g (list 1 4)))\n))\n\n(let (assoc\n x (lambda (list 6 (list 1 2)))\n y (lambda (list 7 (list 1 4)))\n )\n \n (print (difference x y) )\n (print (call (difference x y) (assoc _ x)) )\n)\n\n(let (assoc\n x (lambda (list 6 (list (list \"a\" \"b\") 1 2)))\n y (lambda (list 7 (list (list \"a\" \"x\") 1 4)))\n )\n (print (difference x y) )\n (print (call (difference x y) (assoc _ x)) )\n)\n" + }, + + { + "parameter" : "mix * node1 * node2 [number keep_chance_node1] [number keep_chance_node2] [number similar_mix_chance]", + "output" : "*", + "new value" : "new", + "description" : "Performs a union operation on node1 and node2, but randomly ignores nodes from one or the other if the node is not equal. If only keep_chance_node1 is specified, keep_chance_node2 defaults to 1-keep_chance_node1. keep_chance_node1 specifies the probability that a node from node1 will be kept, and keep_chance_node2 the probability that a node from node2 will be kept. keep_chance_node1 + keep_chance_node2 should be between 1 and 2, otherwise it will be normalized. similar_mix_chance is the additional probability that two nodes will mix if they have some commonality, which will include interpolating number values based on keep_chance_node1 and keep_chance_node2, and defaults to 0.0. If similar_mix_chance is negative, then 1 minus the value will be anded with the commonality probability, so -1 means that it will never mix and 0 means it will only mix when sufficiently common.", + "example" : "(print (mix\n (lambda (list 1 3 5 7 9 11 13))\n (lambda (list 2 4 6 8 10 12 14))\n0.5 0.5))\n\n(print (mix\n (lambda (list 1 2 (assoc \"a\" 3 \"b\" 4) (lambda (if true 1 (parallel (get_entity_comments) 1))) (list 5 6)) )\n (lambda (list 1 5 3 (assoc \"a\" 3 \"b\" 4) (lambda (if false 1 (parallel (get_entity_comments) (lambda (print (list 2 9))) ))) ) )\n0.8 0.8))\n\n" + }, + + { + "parameter" : "mix_labels * node1 * node2 [number keep_portion] [number keep_portion_node2]", + "output" : "*", + "new value" : "new", + "description" : "Starts with node1, and for all common labels between node1 and node2, mixes node2 into node1. If keep_portion is given, then that is the fraction of matching labels in node2 to use in node1. If both keep_portion and keep_portion_node2 are given, then those are the fractions of labels in node1 and node2 to be used. If the sum is greater than 1 it is normalized, if less, then some labeled code is discarded from node1.", + "example" : "(print (mix_labels\n (lambda (list 1 #mixtest1 2 #mixtest2 (assoc \"a\" 3 \"b\" 4) (lambda (if #mixtest3 true 1 (parallel (get_entity_comments) #mixtest4 1))) (list 5 6)) )\n (lambda (list 1 #mixtest1 5 #mixtest2 3 (assoc \"a\" 3 \"b\" 4) (lambda (if #mixtest3 false 1 (parallel (get_entity_comments) #mixtest4 (lambda (print (list 2 9))) ))) ) )\n0.5))" + }, + + { + "parameter" : "total_entity_size id entity", + "output" : "number", + "new value" : "new", + "description" : "Evaluates to the total count of all of the nodes of the entity represented by the input id and all its contained entities.", + "example" : "(create_entities \"MergeEntity1\" (lambda (assoc \"a\" 3 \"b\" 4)) )\n(create_entities (list \"MergeEntity1\" \"MergeEntityChild1\") (lambda (assoc \"x\" 3 \"y\" 4)) )\n(create_entities (list \"MergeEntity1\" \"MergeEntityChild2\") (lambda (assoc \"p\" 3 \"q\" 4)) )\n(create_entities (list \"MergeEntity1\") (lambda (assoc \"E\" 3 \"F\" 4)) )\n(create_entities (list \"MergeEntity1\") (lambda (assoc \"e\" 3 \"f\" 4 \"g\" 5 \"h\" 6)) )\n\n(create_entities \"MergeEntity2\" (lambda (assoc \"c\" 3 \"b\" 4)) )\n(create_entities (list \"MergeEntity2\" \"MergeEntityChild1\") (lambda (assoc \"x\" 3 \"y\" 4 \"z\" 5)) )\n(create_entities (list \"MergeEntity2\" \"MergeEntityChild2\") (lambda (assoc \"p\" 3 \"q\" 4 \"u\" 5 \"v\" 6 \"w\" 7)) )\n(create_entities (list \"MergeEntity2\") (lambda (assoc \"E\" 3 \"F\" 4 \"G\" 5 \"H\" 6)) )\n(create_entities (list \"MergeEntity2\") (lambda (assoc \"e\" 3 \"f\" 4)) )\n\n(print (total_entity_size \"MergeEntity1\"))\n(print (total_entity_size \"MergeEntity2\"))" + }, + + { + "parameter" : "flatten_entity id entity [bool include_rand_seeds] [bool parallel_create]", + "output" : "*", + "permissions" : "e", + "new value" : "new", + "description" : "Evaluates to code that, if evaluated, would completely reproduce the entity specified by id, as well as all contained entities. If include_rand_seeds is true, its default, it will include all entities' random seeds. If parallel_create is true, then the creates will be performed with parallel markers as appropriate for each group of contained entities.", + "example" : "(create_entities \"FlattenTest\" (lambda\n (parallel ##a (rand) )\n))\n(let (assoc fe (flatten_entity \"FlattenTest\"))\n (print fe)\n (print (flatten_entity (call fe))\n (print (difference_entities \"FlattenTest\" (call fe)))\n )" + }, + + { + "parameter" : "mutate_entity id entity1 [number mutaton_rate] [id entity2] [assoc mutation_weights] [assoc operation_type]", + "output" : "id", + "permissions" : "e", + "new value" : "new", + "description" : "Creates a mutated version of the entity specified by entity1 like mutate. Returns the id of a new entity created contained by the entity that ran it. The value specified in mutation_rate, from 0.0 to 1.0 and defaulting to 0.00001, indicates the probability that any node will experience a mutation. Uses entity2 as the optional destination via an internal call to create_contained_entity. The parameter mutation_weights is an assoc where the keys are the allowed opcode names and the values are the probabilities that each opcode would be chosen; if null or unspecified, it defaults to all opcodes each with their own default probability. The operation_type is an assoc where the keys are mutation operations and the values are the probabilities that the operations will be performed. The operations can consist of the strings change_type, delete, insert, swap_elements, deep_copy_elements, delete, and change_label.", + "example" : "(create_entities\n \"MutateEntity\"\n (lambda (list 1 2 3 4 5 6 7 8 9 10 11 12 13 14 (assoc \"a\" 1 \"b\" 2)))\n)\n(mutate_entity \"MutateEntity\" 0.4 \"MutatedEntity\")\n(print (retrieve_entity_root \"MutatedEntity\"))" + }, + + { + "parameter" : "commonality_entities id entity1 id entity2", + "output" : "number", + "permissions" : "e", + "new value" : "new", + "description" : "Evaluates to the total count of all of the nodes referenced within entity1 and entity2 that are equivalent, including all contained entities.", + "example" : "(create_entities \"e1\" (lambda (assoc \"a\" 3 \"b\" 4)) )\n(create_entities \"e2\" (lambda (assoc \"c\" 3 \"b\" 4)) )\n(print (commonality_entities \"e1\" \"e2\"))" + }, + + { + "parameter" : "edit_distance_entities id entity1 id entity2", + "output" : "number", + "permissions" : "e", + "new value" : "new", + "description" : "Evaluates to the edit distance of all of the nodes referenced within entity1 and entity2 that are equivalent, including all contained entities.", + "example" : "(create_entities \"e1\" (lambda (assoc \"a\" 3 \"b\" 4)) )\n(create_entities \"e2\" (lambda (assoc \"c\" 3 \"b\" 4)) )\n(print (edit_distance_entities \"e1\" \"e2\"))" + }, + + + { + "parameter" : "intersect_entities id entity1 id entity2 [id entity3]", + "output" : "id", + "permissions" : "e", + "new value" : "new", + "description" : "Creates an entity of whatever is common between the Entities represented by entity1 and entity2 exclusive. Returns the id of a new entity created contained by the entity that ran it. Uses entity3 as the optional destination via an internal call create_contained_entity. Any contained entities will be intersected either based on matching name or maximal similarity for nameless entities.", + "example" : "(create_entities \"e1\" (lambda (assoc \"a\" 3 \"b\" 4)) )\n(create_entities \"e2\" (lambda (assoc \"c\" 3 \"b\" 4)) )\n(intersect_entities \"e1\" \"e2\" \"e3\"))\n(print (retrieve_entity_root \"e3\")))" + }, + + { + "parameter" : "union_entities id entity1 id entity2 [id entity3]", + "output" : "id", + "permissions" : "e", + "new value" : "new", + "description" : "Creates an entity of whatever is inclusive when merging the Entities represented by entity1 and entity2. Returns the id of a new entity created contained by the entity that ran it. Uses entity3 as the optional destination via an internal call to create_contained_entity. Any contained entities will be unioned either based on matching name or maximal similarity for nameless entities.", + "example" : "(create_entities \"e1\" (lambda (assoc \"a\" 3 \"b\" 4)) )\n(create_entities \"e2\" (lambda (assoc \"c\" 3 \"b\" 4)) )\n(union_entities \"e1\" \"e2\" \"e3\"))\n(print (retrieve_entity_root \"e3\")))" + }, + + { + "parameter" : "difference_entities id entity1 id entity2", + "output" : "*", + "permissions" : "e", + "new value" : "new", + "description" : "Finds the difference between the entities specified by entity1 and entity2 and generates code that, if evaluated passing the entity id as its parameter \"_\", would turn it into entity entity2 (creating or removing any contained entities as necessary). Useful for finding the smallest set of what needs to be changed to apply it to a new (and possibly slightly different) entity.", + "example" : "(create_entities \"DiffEntity1\" (lambda (assoc \"a\" 3 \"b\" 4)) )\n(create_entities (list \"DiffEntity1\" \"DiffEntityChild1\") (lambda (assoc \"x\" 3 \"y\" 4 \"z\" 6)) )\n(create_entities (list \"DiffEntity1\" \"DiffEntityChild1\" \"DiffEntityChild2\") (lambda (assoc \"p\" 3 \"q\" 4 \"u\" 5 \"v\" 6 \"w\" 7)) )\n(create_entities (list \"DiffEntity1\" \"DiffEntityChild1\" \"DiffEntityChild2\" \"DiffEntityChild3\") (lambda (assoc \"e\" 3 \"p\" 4 \"a\" 5 \"o\" 6 \"w\" 7)) )\n(create_entities (list \"DiffEntity1\" \"OnlyIn1\") (lambda (assoc \"m\" 4)) )\n(create_entities (list \"DiffEntity1\") (lambda (assoc \"E\" 3 \"F\" 4)) )\n(create_entities (list \"DiffEntity1\") (lambda (assoc \"e\" 3 \"f\" 4 \"g\" 5 \"h\" 6)) )\n\n(create_entities \"DiffEntity2\" (lambda (assoc \"c\" 3 \"b\" 4)) )\n(create_entities (list \"DiffEntity2\" \"DiffEntityChild1\") (lambda (assoc \"x\" 3 \"y\" 4 \"z\" 5)) )\n(create_entities (list \"DiffEntity2\" \"DiffEntityChild1\" \"DiffEntityChild2\") (lambda (assoc \"p\" 3 \"q\" 4 \"u\" 5 \"v\" 6 \"w\" 7)) )\n(create_entities (list \"DiffEntity2\" \"DiffEntityChild1\" \"DiffEntityChild2\" \"DiffEntityChild3\") (lambda (assoc \"e\" 3 \"p\" 4 \"a\" 5 \"o\" 6 \"w\" 7)) )\n(create_entities (list \"DiffEntity2\" \"OnlyIn2\") (lambda (assoc \"o\" 6)) )\n(create_entities (list \"DiffEntity2\") (lambda (assoc \"E\" 3 \"F\" 4 \"G\" 5 \"H\" 6)) )\n(create_entities (list \"DiffEntity2\") (lambda (assoc \"e\" 3 \"f\" 4)) )\n\n(print (contained_entities \"DiffEntity2\"))\n\n(print (difference_entities \"DiffEntity1\" \"DiffEntity2\"))\n\n(let (assoc new_entity\n (call (difference_entities \"DiffEntity1\" \"DiffEntity2\") (assoc _ \"DiffEntity1\")))\n (print new_entity)\n (print (retrieve_entity_root new_entity))\n (print (retrieve_entity_root (list new_entity \"DiffEntityChild1\")))\n (print (contained_entities new_entity))\n)\n\n(create_entities \"DiffContainer\" null)\n\n(create_entities (list \"DiffContainer\" \"DiffEntity1\") (lambda (assoc \"a\" 3 \"b\" 4)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity1\" \"DiffEntityChild1\") (lambda (assoc \"x\" 3 \"y\" 4 \"z\" 6)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity1\" \"DiffEntityChild1\" \"DiffEntityChild2\") (lambda (assoc \"p\" 3 \"q\" 4 \"u\" 5 \"v\" 6 \"w\" 7)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity1\" \"DiffEntityChild1\" \"DiffEntityChild2\" \"DiffEntityChild3\") (lambda (assoc \"e\" 3 \"p\" 4 \"a\" 5 \"o\" 6 \"w\" 7)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity1\" \"OnlyIn1\") (lambda (assoc \"m\" 4)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity1\") (lambda (assoc \"E\" 3 \"F\" 4)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity1\") (lambda (assoc \"e\" 3 \"f\" 4 \"g\" 5 \"h\" 6)) )\n\n(create_entities (list \"DiffContainer\" \"DiffEntity2\") (lambda (assoc \"c\" 3 \"b\" 4)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity2\" \"DiffEntityChild1\") (lambda (assoc \"x\" 3 \"y\" 4 \"z\" 6)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity2\" \"DiffEntityChild1\" \"DiffEntityChild2\") (lambda (assoc \"p\" 3 \"q\" 4 \"u\" 5 \"v\" 6 \"w\" 7)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity2\" \"DiffEntityChild1\" \"DiffEntityChild2\" \"DiffEntityChild3\") (lambda (assoc \"e\" 3 \"p\" 4 \"a\" 5 \"o\" 6 \"w\" 7)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity2\" \"OnlyIn2\") (lambda (assoc \"o\" 6)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity2\") (lambda (assoc \"E\" 3 \"F\" 4 \"G\" 5 \"H\" 6)) )\n(create_entities (list \"DiffContainer\" \"DiffEntity2\") (lambda (assoc \"e\" 3 \"f\" 4)) )\n\n(print (difference_entities (list \"DiffContainer\" \"DiffEntity1\") (list \"DiffContainer\" \"DiffEntity2\") ))\n\n(let (assoc new_entity\n (call (difference_entities (list \"DiffContainer\" \"DiffEntity1\") (list \"DiffContainer\" \"DiffEntity2\") )\n (assoc _ (list \"DiffContainer\" \"DiffEntity1\") )))\n (print new_entity)\n (print (get_entity_code new_entity))\n (print (get_entity_code (list new_entity \"DiffEntityChild1\")))\n (print (contained_entities new_entity))\n)\n" + }, + + { + "parameter" : "mix_entities id entity1 id entity2 [number keep_chance_entity1] [number keep_chance_entity2] [number similar_mix_chance] [number chance_mix_unnamed_children] [id entity3]", + "output" : "id", + "permissions" : "e", + "new value" : "new", + "description" : "Performs a union operation on the entities represented by entity1 and entity2, but randomly ignores nodes from one or the other tree if not equal. If only keep_chance_entity1 is specified, keep_chance_entity2 defaults to 1-keep_chance_entity1. keep_chance_entity1 specifies the probability that a node from the entity represented by entity1 will be kept, and keep_chance_entity2 the probability that a node from the entity represented by entity2 will be kept. similar_mix_chance is the additional probability that two nodes will mix if they have some commonality, which will include interpolating number values based on keep_chance_node1 and keep_chance_node2, and defaults to 0.0. If similar_mix_chance is negative, then 1 minus the value will be anded with the commonality probability, so -1 means that it will never mix and 0 means it will only mix when sufficiently common. chance_mix_unnamed_children represents the probability that an unnamed entity pair will be mixed versus preserved as independent chunks, where 0.2 would yield 20% of the entities mixed. Returns the id of a new entity created contained by the entity that ran it. Uses entity3 as the optional destination via an internal call to create_contained_entity. Any contained entities will be mixed either based on matching name or maximal similarity for nameless entities.", + "example" : "(create_entities \"e1\" (lambda (assoc \"a\" 3 \"b\" 4)) )\n(create_entities \"e2\" (lambda (assoc \"c\" 3 \"b\" 4)) )\n(mix_entities \"e1\" \"e2\" 0.5 0.5 \"e3\")" + }, + + { + "parameter" : "get_entity_comments [id entity] [string label] [bool deep_comments]", + "output" : "*", + "permissions" : "e", + "new value" : "new", + "description" : "Evaluates to the corresponding comments based on the parameters. If the id is specified or null is specified as the id, then it will use the current entity. If the label is null or empty string, it will retrieve comments for the entity root, otherwise if it is a valid label it will attempt to retrieve the comments for that label, null if the label doesn't exist. If deep_comments is specified and the label is a declare, then it will return an assoc with the keys being the parameters and the values being the descriptions. If label is empty string or null and deep_comments is true, then it will return an assoc of label to comment for each label in the entity.", + "example" : "(print (get_entity_comments))" + }, + + { + "parameter" : "retrieve_entity_root [id entity] [bool suppress_label_escapes]", + "output" : "*", + "permissions" : "e", + "new value" : "new", + "description" : "Evaluates to the entity's code, looking up the entity by the id. If no id specified or the id is null, then uses the current entity, otherwise accesses a contained entity. If suppress_label_escapes is false or omitted, will disable any labels obtained by inserting an extra # at the beginning of each.", + "example" : "(print (retrieve_entity_root))\n(print (retrieve_entity_root 1))" + }, + + { + "parameter" : "assign_entity_roots [id entity_1] * root_1 [id entity_2] [* root_2] [...]", + "output" : "bool", + "permissions" : "e", + "new value" : "new", + "description" : "Sets the code of the entity specified by id to node. If no id specified, then uses the current entity, otherwise accesses a contained entity. On assigning the code to the new entity, it will enable any labels obtained by removing any extra #s from the beginning of each. If all assignments were successful, then returns true, otherwise returns false.", + "example" : "(print (assign_entity_roots (list)))" + }, + + { + "parameter" : "accum_entity_roots [id entity_1] * root_1 [id entity_2] [* root_2] [...]", + "output" : "bool", + "permissions" : "e", + "new value" : "new", + "description" : "Accumulates the code of the entity specified by id to node. If no id specified, then uses the current entity, otherwise accesses a contained entity. On assigning the code to the new entity, it will enable any labels obtained by removing any extra #s from the beginning of each. If all accumulations were successful, then returns true, otherwise returns false.", + "example" : "(create_entities \"AER_test\" (lambda (null ##a 1 ##b 2)))\n(accum_entity_roots \"AER_test\" (list ##c 3))\n(print (retrieve_entity_root \"AER_test\" 1))" + }, + + { + "parameter" : "get_entity_rand_seed id entity", + "output" : "string", + "permissions" : "e", + "new value" : "new", + "description" : "Evaluates to a string representing the current state of the random number generator for the entity specified by id used for seeding the random streams of any calls to the entity.", + "example" : "(create_entities \"RandTest\" (lambda\n (null ##a (rand) )\n ))\n(print (call_entity \"RandTest\" \"a\"))\n(print (get_entity_rand_seed \"RandTest\"))\n" + }, + + { + "parameter" : "set_entity_rand_seed [id entity] * node [bool deep]", + "output" : "string", + "permissions" : "e", + "description" : "Sets the random number seed and state for the random number generator of the specified entity, or the current entity if not specified, to the state specified by node. If node is already a string in the proper format output by get_entity_rand_seed, then it will set the random generator to that current state, picking up where the previous state left off. If it is anything else, it uses the value as a random seed to start the genrator. Note that this will not affect the state of the current random number stream, only future random streams created by the entity for new calls. The parameter deep defaults to false, but if it is true, all contained entities are recursively set with random seeds based on the specified random seed and a hash of their relative id path to the entity being set.", + "example" : "(create_entities \"RandTest\" (lambda\n (null ##a (rand) )\n ) )\n(create_entities (list \"RandTest\" \"DeepRand\") (lambda\n (null ##a (rand) )\n ) )\n(declare (assoc seed (get_entity_rand_seed \"RandTest\")))\n(print (call_entity \"RandTest\" \"a\"))\n(set_entity_rand_seed \"RandTest\" 1234)\n(print (call_entity \"RandTest\" \"a\"))" + }, + + { + "parameter" : "get_entity_root_permission id entity", + "output" : "number", + "permissions" : "r", + "description" : "Returns true if the entity has root permissions, false if not. Will return null if the caller is not root.", + "example" : " (create_entities \"RootTest\" (lambda (print (system_time)) ))\n(print (get_entity_root_permission \"RootTest\"))" + }, + + { + "parameter" : "set_entity_root_permission id entity bool permission", + "output" : "id", + "permissions" : "r", + "description" : "Sets the root permission on the entity specified by id. If bool is true, then it grants permissions, if it is false, then it removes them. Returns the id of the entity. Can only be called by an entity with root permissions.", + "example" : "(create_entities \"RootTest\" (lambda (print (system_time)) ))\n(set_entity_root_permission \"RootTest\" (true))\n(call_entity \"RootTest\")" + }, + + { + "parameter" : "create_entities [id entity_1] * node_1 [id entity_2] [* node_2] [...]", + "output" : "list of id", + "permissions" : "e", + "new value" : "new", + "description" : "Creates a new entity with code specified by node. Uses the optional entity location specified by the id, ignored if null or invalid. Evaluates to a list of all of the new entities ids, null in place of each id if it was unable to create the id. If the entity does not have permission to create the entities, it will evaluate to null. If the id is ommitted, then it will create the new entity in the calling entity. If id specifies an existing entity, then it will create the new entity within that existing entity. If the last id in the string is not an existing entity, then it will attempt to create that entity (returning null if it cannot). Can only be performed by an entity that contains to the destination specified by id. Will automatically remove a # from the beginning of each label in case the label had been disabled. Unlike the rest of the entity creation commands, create_entities specifies the optional id first to make it easy to read entity definitions. If more than 2 parameters are specified, create_entities will iterate through all of the pairs of parameters, treating them like the first two as it creates new entities.", + "example" : "(print (create_entities \"MyLibrary\" (lambda (+ #three 3 4)) ) )\n\n(create_entities \"EntityWithChildren\" (lambda (assoc \"a\" 3 \"b\" 4)) )\n(create_entities (list \"EntityWithChildren\" \"Child1\") (lambda (assoc \"x\" 3 \"y\" 4)) )\n(create_entities (list \"EntityWithChildren\" \"Child2\") (lambda (assoc \"p\" 3 \"q\" 4)) )\n(print (contained_entities \"EntityWithChildren\"))" + }, + + { + "parameter" : "clone_entities id source_entity_1 [id destination_entity_1] [id source_entity_2] [id destination_entity_2] [...]", + "output" : "list of id", + "permissions" : "e", + "new value" : "new", + "description" : "Creates a clone of source_entity_1. If destination_entity_1 is not specified, then it clones the entity into the current entity. If destination_entity_1 is specified, then it clones it into the location specified by destination_entity_1; if destination_entity_1 is an existing entity, then it will create it within that entity, if not, it will attempt to create it with the given id. Evaluates to the id of the new entity. Can only be performed by an entity that contains both source_entity_1 and the specified path of destination_entity_1. If multiple entities are specified, it will move each from the source to the destination. Evaluates to a list of the new entity ids.", + "example" : "(print (create_entities \"MyLibrary\" (lambda (+ #three 3 4)) ) )\n(print (clone_entities \"MyLibrary\" \"MyNewLibrary\"))" + }, + + { + "parameter" : "move_entities id source_entity_1 [id destination_entity_1] [id source_entity_2] [id destination_entity_2] [...]", + "output" : "list of id", + "permissions" : "e", + "new value" : "new", + "description" : "Moves the entity from location specified by source_entity_1 to destination destination_entity_1. If destination_entity_1 exists, it will move source_entity_1 using source_entity_1's current id into destination_entity_1. If destination_entity_1 does not exist, then it will move source_entity_1 and rename it to the end of the id specified in destination_entity_1. Can only be performed by a containing entity relative to both ids. If multiple entities are specified, it will move each from the source to the destination. Evaluates to a list of the new entity ids.", + "example" : "(print (create_entities \"MyLibrary\" (lambda (+ #three 3 4)) ) )\n(print (move_entities \"MyLibrary\" \"MyLibrary2\"))" + }, + + { + "parameter" : "destroy_entities [id entity_1] [id entity_2] [...]", + "output" : "bool", + "permissions" : "e", + "new value" : "new", + "description" : "Destroys the entities specified by the ids entity_1, entity_2, etc. Can only be performed by containing entity. Retruns true if all entities were successfully destroyed, false if not due to not existing in the first place or due to code being currently run in it.", + "example" : "(print (create_entities \"MyLibrary\" (lambda (+ #three 3 4)) ) )\n(print (contained_entities))\n(destroy_entities \"MyLibrary\")\n(print (contained_entities))" + }, + + { + "parameter" : "load string file_path [bool escape_filename] [string file_type]", + "output" : "*", + "permissions" : "r", + "description" : "Loads the data specified by the resource in string. Attempts to load the file type and parse it into appropriate data and evaluate to the corresponding code. The parameter escape_filename defaults to false, but if it is true, it will agressively escape filenames using only alphanumeric characters and the underscore, using underscore as an escape character. If file_type is specified and not null, it will use the file_type specified instead of the extension of the file_path. File formats supported are amlg, json, yaml, csv, cstl, and caml; anything not in this list will be loaded as a binary string. Note that loading from a non-'.amlg' extension will only ever provide lists, assocs, numbers, and strings.", + "example" : "(print (load \"my_directory/MyModule.amlg\"))" + }, + + { + "parameter" : "load_entity string file_path [id entity] [bool escape_filename] [bool escape_contained_filenames] [string file_type]", + "output" : "id", + "permissions" : "r", + "description" : "Loads an entity specified by the resource in string. Attempts to load the file type and parse it into appropriate data and store it in the entity specified by id, following the same id creation rules as create_entities, except that if no id is specified, it may default to a name based on the resource if available. The parameter escape_filename defaults to false, but if it is true, it will agressively escape filenames using only alphanumeric characters and the underscore, using underscore as an escape character. If escape_contained_filenames is true, which is its default, it will also escape contained entity filenames. If file_type is specified and not null, it will use the file_type specified instead of the extension of the file_path. File formats supported are amlg, json, yaml, csv, cstl, and caml; anything not in this list will be loaded as a binary string. Note that loading from a non-'.amlg' extension will only ever provide lists, assocs, numbers, and strings.", + "example" : "(load_entity \"my_directory/MyModule.amlg\" \"MyModule\")" + }, + + { + "parameter" : "load_persistent_entity string file_path [id entity] [bool escape_filename]", + "output" : "id", + "permissions" : "r", + "description" : "Loads an entity specified by the resource in string. Attempts to load the file type and parse it into appropriate data and store it in the entity specified by id, following the same id creation rules as create_entities. Any modifications to the entity or any entity contained within it will be written out to the resource, so that the memory and persistent storage are synchronized. The parameter escape_filename defaults to false, but if it is true, it will agressively escape filenames using only alphanumeric characters and the underscore, using underscore as an escape character. This command will escape contained filenames. The file type of a persisted entity must match the extension of the file of the main entity. File formats supported are amlg, json, yaml, csv, cstl, and caml; anything not in this list will be loaded as a binary string. Note that loading from a non-'.amlg' extension will only ever provide lists, assocs, numbers, and strings.\n\nWARNING: Loading the same file as a persistent entity in more than one place will overwrite the file each time either entity is altered, but changes will not be propogated between the entities.", + "example" : "(load_persistent_entity \"my_directory/MyModule.amlg\" \"MyModule\")" + }, + + { + "parameter" : "store string file_path * node [bool escape_filename] [string file_type] [assoc params]", + "output" : "bool", + "permissions" : "r", + "description" : "Stores the code specified by * to the resource in string. Returns true if successful, false if not. The parameter escape_filename defaults to false, but if it is true, it will agressively escape filenames using only alphanumeric characters and the underscore, using underscore as an escape character. If file_type is specified and not null, it will use the file_type specified instead of the extension of the file_path. File formats supported are amlg, json, yaml, csv, cstl, and caml; anything not in this list will be loaded as a binary string. Note that loading from a non-'.amlg' extension will only ever provide lists, assocs, numbers, and strings. If params is specified, it is an assoc that contains key-value pairs describing the format. The key \"sort_keys\" can be used to specify a boolean value, if true, then it will sort the keys, otherwise the default behavior is to emit the keys based on memory layout.", + "example" : "(store \"my_directory/MyData.amlg\" (list 1 2 3))" + }, + + { + "parameter" : "store_entity string file_path id entity [bool escape_filename] [bool escape_contained_filenames] [string file_type] [assoc params]", + "output" : "bool", + "permissions" : "r", + "description" : "Stores the entity specified by the id to the resource in string. Returns true if successful, false if not. The parameter escape_filename defaults to false, but if it is true, it will agressively escape filenames using only alphanumeric characters and the underscore, using underscore as an escape character. If escape_contained_filenames is true, which is its default, it will also escape contained entity filenames. If file_type is specified and not null, it will use the file_type specified instead of the extension of the file_path. File formats supported are amlg, json, yaml, csv, cstl, and caml; anything not in this list will be loaded as a binary string. Note that loading from a non-'.amlg' extension will only ever provide lists, assocs, numbers, and strings. If params is specified, it is an assoc that contains key-value pairs describing the format. The key \"sort_keys\" can be used to specify a boolean value, if true, then it will sort the keys, otherwise the default behavior is to emit the keys based on memory layout.", + "example" : "(store_entity \"my_directory/MyData.amlg\" \"MyData\")" + }, + + { + "parameter" : "contains_entity id entity", + "output" : "bool", + "permissions" : "e", + "new value" : "new", + "description" : "Returns true if the referred to entity specified by id exists.", + "example" : "(print (create_entities \"MyLibrary\" (lambda (+ #three 3 4)) ) )\n(print (contains_entity \"MyLibrary\"))\n(print (contains_entity (list \"MyLibrary\")))" + }, + + { + "parameter" : "contained_entities [id containing_entity] [list conditions]", + "output" : "list of string", + "permissions" : "e", + "new value" : "new", + "description" : "Returns a list of strings of ids of entities contained in the entity specified by id or current entity if id is ommitted. The optional list is a conjunction of conditions that are required in order for a contained entity to be returned. The conditions are all of the commands that begin with query_.", + "example" : "(create_entities (list \"TestEntity\" \"Child\")\n (lambda (null ##TargetLabel 3))\n) \n\n (contained_entities \"TestEntity\" (list\n (query_exists \"TargetLabel\")\n)) \n\n ; For more examples see the individual entries for each query." + }, + + { + "parameter" : "compute_on_contained_entities [id containing_entity] [list conditions]", + "output" : "*", + "permissions" : "e", + "new value" : "new", + "description" : "Performs queries like contained_entities but returns a value or set of values appropriate for the last query in conditions. The parameter conditions is a conjunction of conditions that are required in order for the final query to be evaluated. Each entity in the list is a query. The conditions are all of the commands that begin with query_. If the last query does not return anything, then it will just return the matching entities.", + "example" : "(create_entities (list \"TestEntity\" \"Child\")\n (lambda (null ##TargetLabel 3))\n) \n\n (compute_on_contained_entities \"TestEntity\" (list\n (query_exists \"TargetLabel\")\n)) \n\n ; For more examples see the individual entries for each query." + }, + + { + "parameter" : "query_count", + "output" : "query", + "new value" : "new", + "description" : "When used as a compute_on_contained_entities argument, counts the number of entities that match the criteria and returns the number.", + "example" : "(compute_on_contained_entities \"TestEntity\" (list\n (query_count)\n))" + }, + + { + "parameter" : "query_select number num_to_select [number start_offset] [number random_seed]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects num_to_select entities sorted by entity id. If start_offset is specified, then it will return num_to_select starting that far in, and subsequent calls can be used to get all entities in batches. If random_seed is specified, then it will select num_to_select entities randomly from the list based on the random seed. If random_seed is specified and start_offset is null, then it will not guarantee a position in the order for subsequent calls that specify start_offset, and will execute more quickly.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_select 4 (null) (rand))\n))" + }, + + { + "parameter" : "query_sample number num_to_select [number random_seed]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects a random sample of num_to_select entities sorted by entity_id with replacement. If random_seed is specified, then it will select num_to_select entities randomly from the list based on the random seed. If random_seed is not specified then the subsequent calls will return the same sample of entities.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_sample 4 (rand))\n))" + }, + + { + "parameter" : "query_weighted_sample string weight_label_name number num_to_select [number random_seed]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects a random sample of num_to_select entities sorted by entity_id with replacement. It will use weight_label_name as the feature containing the weights for the sampling, which will be normalized prior to sampling. Non-numbers and negative infinite values will be ignored, and if there are any infinite values, those will be selected from uniformly. If random_seed is specified, then it will select num_to_select entities randomly from the list based on the random seed. If random_seed is not specified then the subsequent calls will return the same sample of entities.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_weighted_sample \"weight\" 4 (rand))\n))" + }, + + { + "parameter" : "query_in_entity_list list list_of_entity_ids", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects only the entities in list_of_entity_ids. It can be used to filter results before doing subsequent queries.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_in_entity_list (list \"Entity1\" \"Entity2\"))\n))" + }, + + { + "parameter" : "query_not_in_entity_list list list_of_entity_ids", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, filters out the entities in list_of_entity_ids. It can be used to filter results before doing subsequent queries.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_not_in_entity_list (list \"Entity1\" \"Entity2\"))\n))" + }, + + { + "parameter" : "query_exists string label_name", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities which have the named label. If called last with compute_on_contained_entities, then it returns an assoc of entity ids, where each value is an assoc of corresponding label names and values.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_exists \"TargetLabel\")\n))" + }, + + { + "parameter" : "query_not_exists string label_name", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities which do not have the named label.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_not_exists \"TargetLabel\")\n))" + }, + + { + "parameter" : "query_equals string label_name * node_value", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities for which the specified label is equal to the specified *.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_equals \"TargetLabel\" 3)\n))" + }, + + { + "parameter" : "query_not_equals string label_name * node_value", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities for which the specified label is not equal to the specified *.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_not_equals \"TargetLabel\" 3)\n))" + }, + + { + "parameter" : "query_between string label_name * lower_bound * upper_bound", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities for which the specified label has a value between the specified lower_bound an upper_bound.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_between \"TargetLabel\" 2 5)\n)) \n\n (contained_entities \"TestEntity\" (list\n (query_between \"x\" -4 5)\n (query_between \"y\" -4 0)\n))" + }, + + { + "parameter" : "query_not_between string label_name * lower_bound * upper_bound", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities for which the specified label has a value outside the specified lower_bound an upper_bound.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_not_between \"TargetLabel\" 2 5)\n)) \n\n (contained_entities \"TestEntity\" (list\n (query_not_between \"x\" -4 5)\n (query_not_between \"y\" -4 0)\n))" + }, + + { + "parameter" : "query_among string label_name list values", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities for which the specified label has one of the values specified in values.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_among \"TargetLabel\" (2 5))\n)) \n\n (contained_entities \"TestEntity\" (list\n (query_among \"x\" (list -4 5))\n (query_among \"y\" (list -4 0))\n))" + }, + + { + "parameter" : "query_not_among string label_name list values", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities for which the specified label does not have one of the values specified in values.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_not_among \"TargetLabel\" (2 5))\n)) \n\n (contained_entities \"TestEntity\" (list\n (query_not_among \"x\" (list -4 5))\n (query_not_among \"y\" (list -4 0))\n))" + }, + + { + "parameter" : "query_max string label_name [number entities_returned] [bool numeric]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects a number of entities with the highest values in the specified label. If entities_returned is specified, it will return that many entities, otherwise will return 1. If numeric is true, its default value, then it only considers numeric values; if false, will consider all types.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_max \"TargetLabel\" 3)\n))" + }, + + { + "parameter" : "query_min string label_name [number entities_returned] [bool numeric]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects a number of entities with the lowest values in the specified label. If entities_returned is specified, it will return that many entities, otherwise will return 1. If numeric is true, its default value, then it only considers numeric values; if false, will consider all types.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_min \"TargetLabel\" 3)\n))" + }, + + { + "parameter" : "query_sum string label_name [string weight_label_name]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, returns the sum of all entities over the specified label. If weight_label_name is specified, it will find the weighted sum, which is the same as a dot product.", + "example" : "(compute_on_contained_entities \"TestEntity\" (list\n (query_sum \"TargetLabel\")\n))" + }, + + { + "parameter" : "query_mode string label_name [string weight_label_name] [bool numeric]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, finds the statistical mode of label_name for numerical data. If weight_label_name is specified, it will find the weighted mode. If numeric is true, its default, then it will treat all values as numeric, otherwise it will treat them all as strings. If numeric and no numeric mode exists, it will return .nan, but if string and no string mode exists, it will return null.", + "example" : "(compute_on_contained_entities \"TestEntity\" (list\n (query_mode \"TargetLabel\")\n))" + }, + + { + "parameter" : "query_quantile string label_name [number q] [string weight_label_name]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, finds the statistical quantile of label_name for numerical data, using q as the parameter to the quantile (default 0.5, median). If weight_label_name is specified, it will find the weighted quantile, otherwise weight is 1.", + "example" : "(compute_on_contained_entities \"TestEntity\" (list\n (query_quantile \"TargetLabel\" 0.75)\n))" + }, + + { + "parameter" : "query_generalized_mean string label_name number p [string weight_label_name] [number center] [bool calculate_moment] [bool absolute_value]", + "output" : "query", + "new value" : "new", + "description": "When used as a query argument, computes the generalized mean over the label_name for numeric data, using p as the parameter to the generalized mean. If weight_label_name is specified, it will compute a weighted mean, normalizing the values of contained by weight_label_name. If center is specified, calculations will use that as central point, default is 0.0. If calculate_moment is true, results will not be raised to 1/p for p>=1. If absolute_value is true, the first order mean (p=1) will take the absolute value.", + "example" : "(compute_on_contained_entities \"TestEntity\" (list\n (query_generalized_mean \"TargetLabel\" 0.5)\n))" + }, + + { + "parameter" : "query_min_difference string label_name [number cyclic_range] [bool include_zero_difference]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, finds the smallest difference between any two values for the specified label. If cyclic_range is null, the default value, then it will assume the values are not cyclic; if it is a number, then it will assume the range is from 0 to cyclic_range. If include_zero_difference is true, its default value, then it will return 0 if the smallest gap between any two numbers is 0; if false, it will return the smallest nonzero value.", + "example" : "(compute_on_contained_entities \"TestEntity\" (list\n (query_min_difference \"TargetLabel\")\n))" + }, + + { + "parameter" : "query_max_difference string label_name [number cyclic_range]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, finds the largest difference between any two values for the specified label. If cyclic_range is null, the default value, then it will assume the values are not cyclic; if it is a number, then it will assume the range is from 0 to cyclic_range.", + "example" : "(compute_on_contained_entities \"TestEntity\" (list\n (query_max_difference \"TargetLabel\")\n))" + }, + + { + "parameter" : "query_value_masses string label_name [string weight_label_name] [bool numeric]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, computes the counts for each value of the label and returns an assoc with the keys being the label values and the values being the counts or weights of the values. If weight_label_name is specified, then it will accumulate that weight for each value, otherwise it will use a weight of 1 for each yielding a count. If numeric is true, its default, then it will treat all values as numeric, otherwise it will treat them all as strings.", + "example" : "(compute_on_contained_entities \"TestEntity\" (list\n (query_value_masses \"TargetLabel\")\n))" + }, + + { + "parameter" : "query_less_or_equal_to string label_name * max_value", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities with a value in the specified label less than or equal to the specified *.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_less_or_equal_to \"TargetLabel\" 3)\n))" + }, + + { + "parameter" : "query_greater_or_equal_to string label_name * min_value", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities with a value in the specified label greater than or equal to the specified *.", + "example" : "(contained_entities \"TestEntity\" (list\n (query_greater_or_equal_to \"TargetLabel\" 3)\n))" + }, + + { + "parameter" : "query_within_generalized_distance number max_distance list axis_labels list axis_values list|assoc|number weights list|assoc distance_types list|assoc attributes list|assoc|number deviations [number p_value] [string|number distance_transform] [string entity_weight_label_name] [number random_seed] [string radius_label] [string numerical_precision] [* output_sorted_list]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects entities which represent a point within a certain generalized norm to a given point. axis_labels specifies the names of the coordinate axes (as labels on the target entity), and axis_values the specifies the corresponding values for the point to test from. p_value is the generalized norm parameter. weights is a list or assoc of dimension weights to use for the query, each value mapping to its respective element in the vectors. If weights is null, then it will assume that the weights are 1 and additionally will ignore null values for the vectors instead of treating them as unknown differences. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal\" (checks for exact matches), \"continuous\" (takes the numeric difference between two values), \"cyclic\" (takes the numeric difference where the min and max wrap around), \"string\" (computes the edit distance between strings), and \"code\" (computes the edit distance between trees or graphs of code). For attributes, the particular distance_types specifies what is expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values available. For continuous, a null means unbounded where distance for a null will be computed automatically from the relevant data; a single number indicates the difference between a value and a null, a specified uncertainty. Cyclic requires either a single value or a list of two values; a list of two values indicates that the first value, the lower bound, will wrap around to the upper bound, the second value specified; if only a single number is provided instead of a list, then it will assume that number for the upper bound and 0 for the lower bound. For the string distance type, the value specified can be a number indicating the maximum possible string length, inferred if null is provided. For code, the value specified can be a number indicating the maximum number of nodes in the code (including labels), inferred if null is provided. Deviations contains numbers that are used during the distance calculation, per-element, prior to exponentiation. Specifying null as deviations is equivalent to setting each deviation to 0. max_distance is the maximum distance allowed. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precison, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their distances. If these distances are returned, then a transform may be applied to them based on distance_transform. If distance_transform is \"surprisal_to_prob\" then distances will be assumed to be surprisals and will be transformed back into probabilities before being returned. If distance_transform is a number or omitted, which will default to 1.0, then it will be treated as a distance weight exponent, and will be applied to each distance as distance^distance_weight_exponent. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values for each respective entity.", + "example" : "(contained_entities \"TestContainerExec\" (list\n (query_within_generalized_distance 60 (list \"x\" \"y\") (list 0.0 0.0) (null) (null) (null) (null) 0.5 1 (null) \"random seed 1234\" \"radius\")\n))" + }, + + { + "parameter" : "query_nearest_generalized_distance number entities_returned list axis_labels list axis_values list|assoc weights list|assoc distance_types list|assoc attributes list|assoc deviations [number p_value] [string|number distance_transform] [string entity_weight_label_name] [number random_seed] [string radius_label] [string numerical_precision] [* output_sorted_list]", + "output" : "query", + "new value" : "new", + "description" : "When used as a query argument, selects the closest entities which represent a point within a certain generalized norm to a given point. axis_labels specifies the names of the coordinate axes (as labels on the target entity), and axis_values the specifies the corresponding values for the point to test from. p_value is the generalized norm parameter. weights is a list or assoc of dimension weights to use for the query, each value mapping to its respective element in the vectors. If weights is null, then it will assume that the weights are 1 and additionally will ignore null values for the vectors instead of treating them as unknown differences. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal\" (checks for exact matches), \"continuous\" (takes the numeric difference between two values), \"cyclic\" (takes the numeric difference where the min and max wrap around), \"string\" (computes the edit distance between strings), and \"code\" (computes the edit distance between trees or graphs of code). \nFor attributes, the particular distance_types specifies what particular attributes are expected. In all cases, there is the option to specify a list of values, where the second last value is the difference to use when one of the values being compared is null, and the last value is the difference to use when both of the values are null. If the last value is omitted, it will use the second last value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations contains numbers that are used during the distance calculation, per-element, prior to exponentiation. Specifying null as deviations is equivalent to setting each deviation to 0. entities_returned specifies the number of entities to return. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precison, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their distances. If these distances are returned, then a transform may be applied to them based on distance_transform. If distance_transform is \"surprisal_to_prob\" then distances will be assumed to be surprisals and will be transformed back into probabilities before being returned. If distance_transform is a number or omitted, which will default to 1.0, then it will be treated as a distance weight exponent, and will be applied to each distance as distance^distance_weight_exponent. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values for each respective entity.", + "example" : "(contained_entities \"TestContainerExec\" (list\n (query_nearest_generalized_distance (list \"x\" \"y\") (list 0.0 0.0) 0.5 (list 0.25 0.75) (list 5 0) (list null (list 0 360)) (list 0.5 0.0) 10 \"radius\")\n))\n(contained_entities \"TestContainerExec\" (list\n (query_nearest_generalized_distance (list \"x\" \"y\") (list 0.0 0.0) 0.5 (null) (null) 10 \"radius\")\n))" + }, + + { + "parameter" : "compute_entity_convictions number entities_returned list feature_labels list entity_ids_to_compute list|assoc weights list|assoc distance_types list|assoc attributes list|assoc deviations [number p_value] [string|number distance_transform] [string entity_weight_label_name] [number random_seed] [string radius_label] [string numerical_precision] [bool conviction_of_removal] [* output_sorted_list]", + "output" : "query", + "new value" : "new", + "concurrency" : true, + "description" : "When used as a query argument, computes the case conviction for every case given in case_ids_to_compute with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null/emptylist, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. p_value is the generalized norm parameter. If weights is null, then it will assume that the weights are 1 and additionally will ignore null values for the vectors instead of treating them as unknown differences. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal\" (checks for exact matches), \"continuous\" (takes the numeric difference between two values), \"cyclic\" (takes the numeric difference where the min and max wrap around), \"string\" (computes the edit distance between strings), and \"code\" (computes the edit distance between trees or graphs of code). \nFor attributes, the particular distance_types specifies what particular attributes are expected. In all cases, there is the option to specify a list of values, where the second last value is the difference to use when one of the values being compared is null, and the last value is the difference to use when both of the values are null. If the last value is omitted, it will use the second last value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations contains numbers that are used during the distance calculation, per-element, prior to exponentiation. Specifying null as deviations is equivalent to setting each deviation to 0. entities_returned specifies the number of entities to return. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precison, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal_to_prob\" then distances will be assumed to be surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If conviction_of_removal is true, then it will compute the conviction as if the entities specified by entity_ids_to_compute were removed; if false (the default), then will compute the conviction as if those entities were added or included. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values for each respective entity.", + "example" : "(compute_on_contained_entities \"TestContainerExec\" (list\n (compute_entity_convictions (list \"feature_1\" \"feature_2\") (list entity_id_1 entity_id_2 entity_id 3) 1.0 (list 0.25 0.75) (list 5 0) (list null (list 0 360)) (list 0.5 0.0) 10 \"radius\")\n))\n(compute_on_contained_entities \"TestContainerExec\" (list\n (compute_entity_convictions (list \"x\" \"y\") (null) 2.0 (null) (null) 10 \"radius\")\n))" + }, + + { + "parameter" : "compute_entity_group_kl_divergence number entities_returned list feature_labels list entity_ids_to_compute list|assoc weights list|assoc distance_types list|assoc attributes list|assoc deviations [number p_value] [string|number distance_transform] [string entity_weight_label_name] [number random_seed] [string radius_label] [string numerical_precision] [bool conviction_of_removal]", + "output" : "query", + "new value" : "new", + "concurrency" : true, + "description" : "When used as a query argument, computes the case kl divergence for every case given in case_ids_to_compute as a group with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null/emptylist, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. p_value is the generalized norm parameter. If weights is null, then it will assume that the weights are 1 and additionally will ignore null values for the vectors instead of treating them as unknown differences. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal\" (checks for exact matches), \"continuous\" (takes the numeric difference between two values), \"cyclic\" (takes the numeric difference where the min and max wrap around), \"string\" (computes the edit distance between strings), and \"code\" (computes the edit distance between trees or graphs of code). \nFor attributes, the particular distance_types specifies what particular attributes are expected. In all cases, there is the option to specify a list of values, where the second last value is the difference to use when one of the values being compared is null, and the last value is the difference to use when both of the values are null. If the last value is omitted, it will use the second last value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations contains numbers that are used during the distance calculation, per-element, prior to exponentiation. Specifying null as deviations is equivalent to setting each deviation to 0. entities_returned specifies the number of entities to return. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precison, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal_to_prob\" then distances will be assumed to be surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If conviction_of_removal is true, then it will compute the conviction as if the entities specified by entity_ids_to_compute were removed; if false (the default), then will compute the conviction as if those entities were added or included.", + "example" : "(compute_on_contained_entities \"TestContainerExec\" (list\n (compute_entity_group_kl_divergence (list \"feature_1\" \"feature_2\") (list entity_id_1 entity_id_2 entity_id 3) 1.0 (list 0.25 0.75) (list 5 0) (list null (list 0 360)) (list 0.5 0.0) 10 \"radius\")\n))\n(compute_on_contained_entities \"TestContainerExec\" (list\n (compute_entity_group_kl_divergence (list \"x\" \"y\") (null) 2.0 (null) (null) 10 \"radius\")\n))" + }, + + { + "parameter" : "compute_entity_distance_contributions number entities_returned list feature_labels list entity_ids_to_compute list|assoc weights list|assoc list|assoc distance_types list|assoc attributes list|assoc deviations [number p_value] [string|number distance_transform] [string entity_weight_label_name] [number random_seed] [string radius_label] [string numerical_precision] [* output_sorted_list]", + "output" : "query", + "new value" : "new", + "concurrency" : true, + "description" : "When used as a query argument, computes the case conviction for every case given in case_ids_to_compute with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null/emptylist, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. p_value is the generalized norm parameter. If weights is null, then it will assume that the weights are 1 and additionally will ignore null values for the vectors instead of treating them as unknown differences. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal\" (checks for exact matches), \"continuous\" (takes the numeric difference between two values), \"cyclic\" (takes the numeric difference where the min and max wrap around), \"string\" (computes the edit distance between strings), and \"code\" (computes the edit distance between trees or graphs of code). \nFor attributes, the particular distance_types specifies what particular attributes are expected. In all cases, there is the option to specify a list of values, where the second last value is the difference to use when one of the values being compared is null, and the last value is the difference to use when both of the values are null. If the last value is omitted, it will use the second last value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations contains numbers that are used during the distance calculation, per-element, prior to exponentiation. Specifying null as deviations is equivalent to setting each deviation to 0. entities_returned specifies the number of entities to return. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precison, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal_to_prob\" then distances will be assumed to be surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values for each respective entity.", + "example" : "(compute_on_contained_entities \"TestContainerExec\" (list\n (compute_entity_distance_contributions (list \"feature_1\" \"feature_2\") (list entity_id_1 entity_id_2 entity_id 3) 1.0 (list 0.25 0.75) (list 5 0) (list null (list 0 360)) (list 0.5 0.0) 10 \"radius\")\n))\n(compute_on_contained_entities \"TestContainerExec\" (list\n (compute_entity_distance_contributions (list \"x\" \"y\") (null) 2.0 (null) (null) 10 \"radius\")\n))" + }, + + { + "parameter" : "compute_entity_kl_divergences number entities_returned list feature_labels list entity_ids_to_compute list|assoc weights list|assoc distance_types list|assoc attributes list|assoc deviations [number p_value] [string|number distance_transform] [string entity_weight_label_name] [number random_seed] [string radius_label] [string numerical_precision] [bool conviction_of_removal] [* output_sorted_list]", + "output" : "query", + "new value" : "new", + "concurrency" : true, + "description" : "When used as a query argument, computes the case conviction for every case given in case_ids_to_compute with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null/emptylist, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. p_value is the generalized norm parameter. If weights is null, then it will assume that the weights are 1 and additionally will ignore null values for the vectors instead of treating them as unknown differences. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal\" (checks for exact matches), \"continuous\" (takes the numeric difference between two values), \"cyclic\" (takes the numeric difference where the min and max wrap around), \"string\" (computes the edit distance between strings), and \"code\" (computes the edit distance between trees or graphs of code). \nFor attributes, the particular distance_types specifies what particular attributes are expected. In all cases, there is the option to specify a list of values, where the second last value is the difference to use when one of the values being compared is null, and the last value is the difference to use when both of the values are null. If the last value is omitted, it will use the second last value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations contains numbers that are used during the distance calculation, per-element, prior to exponentiation. Specifying null as deviations is equivalent to setting each deviation to 0. entities_returned specifies the number of entities to return. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precison, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal_to_prob\" then distances will be assumed to be surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If conviction_of_removal is true, then it will compute the conviction as if the entities specified by entity_ids_to_compute were removed; if false (the default), then will compute the conviction as if those entities were added or included. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values for each respective entity.", + "example" : "(compute_on_contained_entities \"TestContainerExec\" (list\n (compute_entity_kl_divergences (list \"feature_1\" \"feature_2\") (list entity_id_1 entity_id_2 entity_id 3) 1.0 (list 0.25 0.75) (list 5 0) (list null (list 0 360)) (list 0.5 0.0) 10 \"radius\")\n))\n(compute_on_contained_entities \"TestContainerExec\" (list\n (compute_entity_kl_divergences (list \"x\" \"y\") (null) 2.0 (null) (null) 10 \"radius\")\n))" + }, + + { + "parameter" : "contains_label [id entity] string label_name", + "output" : "bool", + "new value" : "new", + "description" : "Evaluates to true if the label represented by string exists for the entity specified by id for a contained entity. If id is omitted, then it uses the current entity.", + "example" : "(print (contains_label \"MyEntity\" \"some_label\"))" + }, + + { + "parameter" : "assign_to_entities [id entity_1] assoc variable_value_pairs_1 [id entity_2] [assoc variable_value_pairs_2] [...]", + "output" : "bool", + "permissions" : "e", + "description" : "For each index-value pair of variable_value_pairs, assigns the value to the labeled variable on the contained entity represented by the respective entity, itself if no id specified, while retaining the original labels. If none found, it will not cause an assignment. When the value is assigned, any labels will be cleared out and the root of the value will be assigned the comments and labels of the previous root at the label. Will perform an assignment for each of the entities referenced, returning (true) if all assignments were successful, (false) if not.", + "example" : "(null #asgn_test1 12)\n(assign_to_entities (assoc asgn_test1 4))\n(print (retrieve_from_entity \"asgn_test1\"))\n\n" + }, + + { + "parameter" : "accum_to_entities [id entity_1] assoc variable_value_pairs_1 [id entity_2] [assoc variable_value_pairs_2] [...]", + "output" : "bool", + "permissions" : "e", + "description" : "For each index-value pair of assoc, retrieves the labeled variable from the respective entity, accumulates it by the corresponding value in variable_value_pairs, then assigns the value to the labeled variable on the contained entity represented by the id, itself if no id specified, while retaining the original labels. If none found, it will not cause an assignment. When the value is assigned, any labels will be cleared out and the root of the value will be assigned the comments and labels of the previous root at the label. Accumulation is performed differently based on the type: for numeric values it adds, for strings, it concatenates, for lists it appends, and for assocs it appends based on the pair. Will perform an accum for each of the entities referenced, returning (true) if all assignments were successful, (false) if not.", + "example" : "(null #asgn_test1 12)\n(accum_to_entities (assoc asgn_test1 4))\n(print (retrieve_from_entity \"asgn_test1\"))\n\n" + }, + + { + "parameter" : "direct_assign_to_entities [id entity_1] assoc variable_value_pairs_1 [id entity_2] [assoc variable_value_pairs_2] [...]", + "output" : "bool", + "permissions" : "e", + "description" : "Like assign_to_entities, except retains any/all labels, comments, etc.", + "example" : "(create_entities \"DRFE\" (lambda (null ##a 12)) )\n(print (direct_retrieve_from_entity \"DRFE\" \"a\"))\n(print (direct_assign_to_entities \"DRFE\" (assoc a 7)))\n(print (direct_retrieve_from_entity \"DRFE\" \"a\"))" + }, + + { + "parameter" : "retrieve_from_entity [id entity] [string|list|assoc label_names]", + "output" : "*", + "permissions" : "e", + "description" : "If string specified, returns the value of the contained entity id, itself if no id specified, at the label specified by the string. If list specified, returns the value of the contained entity id, itself if no id specified, returns a list of the values on the stack specified by each element of the list interpreted as a string label. If assoc specified, returns the value of the contained entity id, itself if no id specified, returns an assoc with the indices of the assoc passed in with the values being the appropriate values of the label represented by each index.", + "example" : "(null #asgn_test1 12)\n(assign_to_entities (assoc asgn_test1 4))\n(print (retrieve_from_entity \"asgn_test1\"))\n\n(null #asgn_test2 12)\n(assign_to_entities (assoc asgn_test2 4))\n(print (retrieve_from_entity \"asgn_test2\"))\n(create_entities \"RCT\" (lambda (null ##a 12 ##b 13)) )\n(print (retrieve_from_entity \"RCT\" \"a\"))\n(print (retrieve_from_entity \"RCT\" (list \"a\" \"b\") ))\n(print (retrieve_from_entity \"RCT\" (zip (list \"a\" \"b\") null) ))\n" + }, + + { + "parameter" : "direct_retrieve_from_entity [id entity] [string|list|assoc label_names]", + "output" : "*", + "permissions" : "e", + "description" : "Like retrieve_from_entity, except retains labels.", + "example" : "(create_entities \"DRFE\" (lambda (null ##a 12)) )\n(print (direct_retrieve_from_entity \"DRFE\" \"a\"))\n(print (direct_assign_to_entities \"DRFE\" (assoc a 7)))\n(print (direct_retrieve_from_entity \"DRFE\" \"a\"))" + }, + + { + "parameter" : "call_entity id entity [string label_name] [assoc arguments] [number operation_limit] [number max_node_allocations]", + "output" : "*", + "permissions" : "e", + "new scope" : true, + "description" : "Calls the contained entity specified by id, using the entity as the new entity context. It will evaluate to the return value of the call, null if not found. If string is specified, then it will call the label specified by string. If assoc is specified, then it will pass assoc as the arguments on the scope stack. If operation_limit is specified, it represents the number of operations that are allowed to be performed. If operation_limit is 0 or infinite, then an infinite of operations will be allotted to the entity, but only if its containing entity (the current entity) has infinite operations. The root entity has infinite computing cycles. If max_node_allocations is specified, it represents the maximum number of nodes that are allowed to be allocated, limiting the total memory. If max_node_allocations is 0 or infinite, then there is no limit to the number of nodes to be allotted to the entity as long as the machine has sufficient memory, but only if the containing entity (the current entity) has unlimited memory access. The execution performed will use a random number stream created from the entity's random number stream.", + "example" : "(create_entities \"TestContainerExec\"\n (lambda (parallel\n ##d (print \"hello \" x)\n )) \n)\n\n(print (call_entity \"TestContainerExec\" \"d\" (assoc x \"goodbye\")))" + }, + + { + "parameter" : "call_entity_get_changes id entity [string label_name] [assoc arguments] [number operation_limit] [number max_node_allocations]", + "output" : "list of *1 *2", + "permissions" : "e", + "new scope" : true, + "description" : "Like call_entity returning the value in *1. However, it also returns a list of direct_assign_to_entities calls with respective data in *2, holding a log of all of the changes that have elapsed. The log may be evaluated to apply or re-apply the changes to any id passed in to the log as _.", + "example" : "(create_entities \"CEGCTest\" (lambda\n (null ##a_assign\n (seq \n (create_entities \"Contained\" (lambda\n (null ##a 4 )\n ))\n (print (retrieve_from_entity \"Contained\" \"a\") )\n (assign_to_entities \"Contained\" (assoc a 6) )\n (print (retrieve_from_entity \"Contained\" \"a\") )\n (set_entity_rand_seed \"Contained\" \"bbbb\")\n (destroy_entities \"Contained\")\n )\n )\n))\n\n(print (call_entity_get_changes \"CEGCTest\" \"a_assign\"))\n" + }, + + { + "parameter" : "call_container string parent_label_name [assoc arguments] [number operation_limit] [number max_node_allocations]", + "output" : "*", + "new scope" : true, + "description" : "Attempts to call the container associated with the label specified by string prepended by a caret (^); the caret indicates that the label is allowed to be accessed by contained entities. It will evaluate to the return value of the call, null if not found. The call is made on the label specified by string. If assoc is specified, then it will pass assoc as the arguments on the scope stack. The parameter accessing_entity will automatically be set to the id of the caller, regardless of the arguments. If operation_limit is specified, it represents the number of operations that are allowed to be performed. If operation_limit is 0 or infinite, then an infinite of operations will be allotted to the entity, but only if its containing entity (the current entity) has infinite operations. The root entity has infinite computing cycles. If max_node_allocations is specified, it represents the maximum number of nodes that are allowed to be allocated, limiting the total memory. If max_node_allocations is 0 or infinite, then there is no limit to the number of nodes to be allotted to the entity as long as the machine has sufficient memory, but only if the containing entity (the current entity) has unlimited memory access. The execution performed will use a random number stream created from the entity's random number stream.", + "permissions" : "e", + "example" : "(create_entities \"TestContainerExec\"\n (lambda (parallel\n ##^a 3\n ##b (contained_entities)\n ##c (+ x 1)\n ##d (call_entity \"TCEc\" \"q\" (assoc x x))\n ##x 4\n ##y 5\n )) \n)\n(create_entities (list \"TestContainerExec\" \"TCEc\")\n (lambda (parallel\n ##p 3\n ##q (+ x (call_container \"a\"))\n ##bar \"foo\"\n ))\n)\n\n(print (call_entity \"TestContainerExec\" \"d\" (assoc x 4)))" + } +]; + +// Help Node out by setting up define. +if (typeof exports === 'object' && typeof define !== 'function') { + define = function (factory) { + factory(require, exports, module); + }; +} + +if (typeof define === 'function') { + define(function (require, exports, module) { + exports.language = data; + }); +} diff --git a/docs/launch.example.json b/docs/launch.example.json new file mode 100644 index 00000000..ad472e66 --- /dev/null +++ b/docs/launch.example.json @@ -0,0 +1,26 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "debug-windows", + "type": "cppvsdbg", + "request": "launch", + "program": "${command:cmake.launchTargetPath}", + "args": [ "amlg_code/full_test.amlg" ], + "cwd": "${workspaceFolder}/Amalgam/", + "stopAtEntry": false + }, + { + "name": "debug-linux", + "type": "cppdbg", + "request": "launch", + "program": "${command:cmake.launchTargetPath}", + "args": [ "amlg_code/full_test.amlg" ], + "cwd": "${workspaceFolder}/Amalgam/", + "stopAtEntry": false + } + ] +} \ No newline at end of file diff --git a/docs/launch.vs.example.json b/docs/launch.vs.example.json new file mode 100644 index 00000000..ec7c6ef5 --- /dev/null +++ b/docs/launch.vs.example.json @@ -0,0 +1,52 @@ +{ + "version": "0.2.1", + "defaults": {}, + "configurations": [ + { + "type": "exe", + "project": "CMakeLists.txt", + "projectTarget": "amalgam-mt.exe", + "name": "amalgam-mt.exe", + "currentDir": "${workspaceRoot}/Amalgam/", + "args": [ + "amlg_code/full_test.amlg" + ] + }, + { + "type": "exe", + "project": "CMakeLists.txt", + "projectTarget": "amalgam-st.exe", + "name": "amalgam-st.exe", + "currentDir": "${workspaceRoot}/Amalgam/", + "args": [ + "amlg_code/full_test.amlg" + ] + }, + { + "type": "cppgdb", + "name": "amalgam-mt", + "project": "CMakeLists.txt", + "projectTarget": "amalgam-mt", + "debuggerConfiguration": "gdb", + "cwd": "Amalgam/", + "args": [ + "amlg_code/full_test.amlg" + ], + "comment": "Learn how to configure WSL debugging. For more info, see http://aka.ms/vslinuxdebug", + "env": {} + }, + { + "type": "cppgdb", + "name": "amalgam-st", + "project": "CMakeLists.txt", + "projectTarget": "amalgam-st", + "debuggerConfiguration": "gdb", + "cwd": "Amalgam/", + "args": [ + "amlg_code/full_test.amlg" + ], + "comment": "Learn how to configure WSL debugging. For more info, see http://aka.ms/vslinuxdebug", + "env": {} + } + ] +} \ No newline at end of file diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 00000000..c80ed2f2 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,15 @@ +# Amalgam Examples + +A collection of notable and neat/fun examples written in Amalgam located in directories in this directory. + +## Useful + +* API Discovery example using a simple implementation of an API in Amalgam +* JSON searching + +## Neat/fun + +* [Conway's Game of Life](https://en.wikipedia.org/wiki/Conway%27s_Game_of_Life) +* [Fractals](https://en.wikipedia.org/wiki/Mandelbrot_set) +* [Quine](https://en.wikipedia.org/wiki/Quine_(computing)) +* [Hello, World!](https://en.wikipedia.org/wiki/%22Hello,_World!%22_program) diff --git a/examples/api_discovery/convert_ns_to_caml.amlg b/examples/api_discovery/convert_ns_to_caml.amlg new file mode 100644 index 00000000..931cf1b5 --- /dev/null +++ b/examples/api_discovery/convert_ns_to_caml.amlg @@ -0,0 +1,10 @@ +; +;converts number_services.amlg into a caml file +; +(seq + (load_entity "number_services.amlg" "ns") + + (store + (concat "./number_services.caml") (retrieve_entity_root "ns" 1) + ) +) \ No newline at end of file diff --git a/examples/api_discovery/number_services.amlg b/examples/api_discovery/number_services.amlg new file mode 100644 index 00000000..b52f3f41 --- /dev/null +++ b/examples/api_discovery/number_services.amlg @@ -0,0 +1,103 @@ +; +;a simple Amalgam API with a demo for API spec generation using "get_api" +; +(null + + ;concatenated version + #version (get (load "version.json") "version") + + ;major version + #major_version 0 + + ;minor version + #minor_version 1 + + ;patch version + #patch_version 0 + + ;returns a structure containing all of the API details for this module + #get_api + (seq + (assoc + "description" + (get_entity_comments) + + "labels" + (map (lambda + (assoc + "description" + (target_value 1) + "parameters" + (get_entity_comments (null) (target_index 1) (true)) + ) + ) + (get_entity_comments (null) (null) (true)) + ) + ) + ) + + + ;returns true if number is even, false if it is not + ;works for infinity, -infinity + ;does not yet support negative numbers -- if a negative number is passed in, + ; it returns a string indicating lack of support + ;if nan is passed in, it will return a list of true and false to represent + ; the superposition + ;please avoid passing in the number 42... + #is_even + (declare (assoc + ; number to be passed in. if none is passed in, defaults to 0 + number 0 + ) + + (if + (= number .infinity) + (true) + + (= number -.infinity) + (true) + + (= number 42) + (assoc "a" (list .nan 3 (null) (list) (assoc "x" 12) .infinity) ) + + (= number .nan) + (list (true) (false)) + + (< number 0) + "Negative numbers are not yet supported." + + (!= (get_type_string number) "number") + "That's not a number." + + (if (mod number 2) (false) (true)) + ) + ) + + ;given the list of numbers, returns an associative array with each key + ; being the number and the value being the result of is_even + #are_even + (declare (assoc + ;list of numbers + numbers (list) + ) + + (zip numbers (map (lambda (call is_even (assoc number (target_value 1)))) numbers) ) + + ) + + ;given a and b, adds the numbers and returns the result of is_even + ; if either a or b is null, then it will return null + #is_sum_even + (declare (assoc + ;the first number. also, here's a test of different utf-8 and special characters: ! ; < > & ^ اَلْعَرَبِيَّةُ 日本語 -- ` ~ ‽ ü " " ' ' '' \n\r\n\r\n + ; and a second line! + a (null) + ;the second number + b (null) + ) + (if (or (= a (null)) (= b (null)) ) + (null) + (call is_even (assoc number (+ a b))) + ) + ) +) diff --git a/examples/api_discovery/number_services_test.amlg b/examples/api_discovery/number_services_test.amlg new file mode 100644 index 00000000..ff98fb73 --- /dev/null +++ b/examples/api_discovery/number_services_test.amlg @@ -0,0 +1,25 @@ +: +;tests for: number_services.amlg +: +(seq + (load_entity "number_services.amlg" "ns") + (set_entity_root_permission "ns" (true)) ; ns entity needs root access to open version file + + (print (call_entity "ns" "get_api") "\n") + (print (call_entity "ns" "version") "\n") + (print (call_entity "ns" "major_version") "\n") + + (print (call_entity "ns" "is_even" (assoc number 1)) "\n") + (print (call_entity "ns" "is_even" (assoc number 2)) "\n") + (print (call_entity "ns" "is_even" (assoc number .infinity)) "\n") + (print (call_entity "ns" "is_even" (assoc number -.infinity)) "\n") + (print (call_entity "ns" "is_even" (assoc number .nan)) "\n") + (print (call_entity "ns" "is_even" (assoc number -1)) "\n") + (print (call_entity "ns" "is_even" (assoc number "blue")) "\n") + (print (call_entity "ns" "is_even" (assoc number 42)) "\n") + + (print (call_entity "ns" "are_even" (assoc numbers (list 1 2 3 "0" .nan 4 4.5)) "\n")) + + (print (call_entity "ns" "is_sum_even" (assoc a 1 b 3)) "\n") + (print (call_entity "ns" "is_sum_even" (assoc a 1)) "\n") +) diff --git a/examples/api_discovery/version.json b/examples/api_discovery/version.json new file mode 100644 index 00000000..dfba51cd --- /dev/null +++ b/examples/api_discovery/version.json @@ -0,0 +1,3 @@ +{ + "version": "0.1.0" +} \ No newline at end of file diff --git a/examples/conways_game_of_life/game_of_life.amlg b/examples/conways_game_of_life/game_of_life.amlg new file mode 100644 index 00000000..2aa156ef --- /dev/null +++ b/examples/conways_game_of_life/game_of_life.amlg @@ -0,0 +1,167 @@ +; +; Conway's Game of Life: https://en.wikipedia.org/wiki/Conway%27s_Game_of_Life +; +; Note: scale your terminal to get full effect +; + +(seq + + ; 80x80 wrap-around map + (declare (assoc edge_index 79)) + + ;init the 80x80 grid to be all 0s + (declare (assoc + board_map (range (lambda (range (lambda 0) 0 edge_index 1) ) 0 edge_index 1) + done (null) + )) + + ;create some initial 'life' + ;glider +    (assign (assoc +        board_map +            (set board_map +                (list 0 4) 1 +                (list 1 4) 1 +                (list 2 4) 1 +                (list 2 3) 1 +                (list 1 2) 1 +            ) +    )) + +    ;oscillator +    (assign (assoc +        board_map +            (set board_map +                (list 30 44) 1 +                (list 31 44) 1 +                (list 32 44) 1 +            ) +    )) + +    ;still +    (assign (assoc +        board_map +            (set board_map +                (list 70 24) 1 +                (list 71 23) 1 +                (list 71 25) 1 +                (list 72 24) 1 +            ) +    )) + +    ;acorn methuseiah +    (assign (assoc +        board_map +            (set board_map +                (list 50 15) 1 +                (list 52 14) 1 +                (list 52 15) 1 +                (list 51 17) 1 +                (list 52 18) 1 +                (list 52 19) 1 +                (list 52 20) 1 +            ) +    )) + + ;any live cell with two or three live neighbours survives. + ;any dead cell with three live neighbours becomes a live cell. + ;all other live cells die in the next generation. Similarly, all other dead cells stay dead. + (null + #CountNeighbors + (declare + (assoc + row_index 0 + col_index 0 + ) + + (declare (assoc + left_index (- col_index 1) + right_index (+ col_index 1) + top_index (- row_index 1) + bottom_index (+ row_index 1) + )) + + (if (< left_index 0) (assign (assoc left_index edge_index)) ) + (if (< top_index 0) (assign (assoc top_index edge_index)) ) + (if (> right_index edge_index) (assign (assoc right_index 0)) ) + (if (> bottom_index edge_index) (assign (assoc bottom_index 0)) ) + + ;count how many of the 8 neighbors are alive + ( + + (get board_map (list top_index left_index)) + (get board_map (list top_index col_index)) + (get board_map (list top_index right_index)) + (get board_map (list row_index left_index)) + (get board_map (list row_index right_index)) + (get board_map (list bottom_index left_index)) + (get board_map (list bottom_index col_index)) + (get board_map (list bottom_index right_index)) + ) + ) + ) + + (while (!= "q" done) + + (map + (lambda (seq + (map + (lambda (print (if (target_value) " X" " -") ) ) + (target_value) + ) + (print "\n") + )) + board_map + ) + + (assign (assoc + board_map + ||(map + (lambda (let + (assoc + row_index (target_index 1) + row (target_value 1) + ) + + (map + (lambda (let + (assoc + col_index (target_index 1) + cell (target_value 1) + num_neighbors 0 + ) + + (assign (assoc + num_neighbors + (call CountNeighbors (assoc + row_index row_index + col_index col_index + )) + )) + + (if (= 0 cell) + ;dead cell with 3 neighbors becomes a live cell + (if (= 3 num_neighbors) + (assign (assoc cell 1)) + ) + + ;else live cell dies if neighbors aren't 2 or 3 + (if (or (> num_neighbors 3) (< num_neighbors 2)) + (assign (assoc cell 0)) + ) + ) + + cell + )) + row + ) + + )) + board_map + ) + )) + + (print "enter for next tick, q to quit: ") + (assign (assoc done (system "readline") )) + ) + +) diff --git a/examples/fractals/mandelbrot_set.amlg b/examples/fractals/mandelbrot_set.amlg new file mode 100644 index 00000000..df7f8b8c --- /dev/null +++ b/examples/fractals/mandelbrot_set.amlg @@ -0,0 +1,107 @@ +; +; Mandelbrot set +; adapted from example https://medium.com/swlh/visualizing-the-mandelbrot-set-using-python-50-lines-f6aa5a05cf0f +; +(seq + + (declare (assoc + width 100 + x -0.65 + y 0 + x_range 3.4 + aspect_ratio (/ 5 3) + ;y_range = x_range / aspect_ratio + y_range (/ x_range (/ 5 3)) + precision 500 + )) + + (declare (assoc + ;init board to 0s + canvas_map (range (lambda (range (lambda 0) 1 width 1) ) 1 (/ width aspect_ratio) 1) + + height (/ width aspect_ratio) + min_x (- x (/ x_range 2)) + max_x (+ x (/ x_range 2)) + min_y (- y (/ y_range 2)) + max_y (+ y (/ y_range 2)) + )) + + (assign (assoc + canvas_map + (map + (lambda (let + (assoc + row (target_index 1) + row_pixels (target_value 1) + ) + + (map + (lambda (let + (assoc + col (target_index 1) + pixel 0 + old_x 0 + old_y 0 + i 0 + ) + (assign (assoc + x (+ min_x (* col (/ x_range width)) ) + y (- max_y (* row (/ y_range height)) ) + )) + (assign (assoc + old_x x + old_y y + )) + + (while + (and + (< i (+ 1 precision)) + (<= (+ (* x x) (* y y)) 4) + ) + (let + (assoc + ;real component of z^2 + a (- (* x x) (* y y)) + ;imaginary component of z^2 + b (* 2 x y) + ) + (assign (assoc + ;real component of new z + x (+ a old_x) + ;imaginary component of new z + y (+ b old_y) + i (+ 1 i) + )) + ) + ) + + (if (< i precision) + (assign (assoc + pixel (pow (/ (+ i 1) (+ precision 1)) 0.2) + )) + ) + + pixel + + )) + row_pixels + ) + + )) + canvas_map + ) + )) + + ;display the board + (map + (lambda (seq + (map + (lambda (print (if (target_value) " X" " -") ) ) + (target_value) + ) + (print "\n") + )) + canvas_map + ) + +) \ No newline at end of file diff --git a/examples/hello_world/hello_world.amlg b/examples/hello_world/hello_world.amlg new file mode 100644 index 00000000..b0cb6917 --- /dev/null +++ b/examples/hello_world/hello_world.amlg @@ -0,0 +1 @@ +(print "hello, world!") \ No newline at end of file diff --git a/examples/json_search/json-combine.amlg b/examples/json_search/json-combine.amlg new file mode 100644 index 00000000..6361387f --- /dev/null +++ b/examples/json_search/json-combine.amlg @@ -0,0 +1,21 @@ +#!./amalgam-mt +(seq + (declare (assoc + file_a (get argv 1) + file_b (get argv 2) + )) + + (if (or (not file_a) (not file_b)) + (seq + (print "Given two json files, stochastically combines them and prints the result.\nUsage: json-combine.amlg json_file1.json json_file2.json\n") + (system "exit") + ) + ) + + (declare (assoc + source_a (load file_a) + source_b (load file_b) + )) + + (print (format (mix source_a source_b) "code" "json") +) \ No newline at end of file diff --git a/examples/json_search/json-search.amlg b/examples/json_search/json-search.amlg new file mode 100644 index 00000000..18e9b4a3 --- /dev/null +++ b/examples/json_search/json-search.amlg @@ -0,0 +1,56 @@ +#!amalgam-mt +(seq + ;find all json files from cwd + (declare (assoc + json_files + (if (= (system "os") "Windows") + (split (last (system "system" "dir \"*.json\" /S /B")) "\n") + (split (last (system "system" "find . -name \"*.json\"")) "\n") + ) + )) + + (declare (assoc source_file (get argv 1))) + (if (not source_file) + (seq + (print "Given a json file, lists the most similar json files anywhere within the cwd.\nUsage: json-search.amlg json_file.json\n") + (system "exit") + ) + ) + (declare (assoc source_data (load source_file))) + + (declare (assoc + edit_dist + (map (lambda + (let (assoc + cur_file (concat (target_value 1)) + cur_data (load (target_value 1)) + ) + + (list cur_file (edit_distance source_data cur_data)) + ) + ) + json_files + ) + )) + + ;sort by edit distance + (assign (assoc edit_dist + (sort (lambda (- (get (target_value) 1) (get (target_value 1) 1) )) edit_dist) + )) + + (print "*** exact matches:\n") + (map (lambda + (print (get (target_value) 0) "\n") + ) + + (filter (lambda (= (get (target_value) 1) 0)) edit_dist) + ) + + (print "*** best nonexact matches:\n") + (map (lambda + (print (get (target_value) 0) "\n") + ) + + (trunc (filter (lambda (> (get (target_value) 1) 0)) edit_dist) 5) + ) +) \ No newline at end of file diff --git a/examples/json_search/small1.json b/examples/json_search/small1.json new file mode 100644 index 00000000..20bf15c1 --- /dev/null +++ b/examples/json_search/small1.json @@ -0,0 +1,6 @@ +{ + "a" : 3, + "b" : 4, + "c" : 5, + "f" : 2 +} diff --git a/examples/json_search/small2.json b/examples/json_search/small2.json new file mode 100644 index 00000000..f14e8853 --- /dev/null +++ b/examples/json_search/small2.json @@ -0,0 +1,6 @@ +{ + "a" : 3, + "b" : 5, + "c" : 5, + "d" : 7 +} diff --git a/examples/json_search/small3.json b/examples/json_search/small3.json new file mode 100644 index 00000000..946694d0 --- /dev/null +++ b/examples/json_search/small3.json @@ -0,0 +1,6 @@ +{ + "a" : 1, + "b" : 4, + "c" : 5, + "e" : 7 +} diff --git a/examples/quine/quine.amlg b/examples/quine/quine.amlg new file mode 100644 index 00000000..c14731a7 --- /dev/null +++ b/examples/quine/quine.amlg @@ -0,0 +1 @@ +(print (retrieve_entity_root)) \ No newline at end of file diff --git a/open-in-vs.bat b/open-in-vs.bat new file mode 100644 index 00000000..0da0a073 --- /dev/null +++ b/open-in-vs.bat @@ -0,0 +1,62 @@ +@echo off +REM +REM Open Amalgam in VS IDE based off first script parameter: +REM +REM 1) Default (no args) : Visual Studio solution (CMake generated, "amd64-windows-vs" preset) +REM 2) vs_cmake : Visual Studio directory (load from directory with CMake file) +REM 3) vscode : VSCode directory (load from directory with CMake file) +REM 4) vs_static : Visual Studio solution (local static non-CMake generated: Amalgam.sln) +REM + +echo Setting up build tools... +set VSWHERE="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" +for /F "tokens=*" %%g in ('%VSWHERE% -latest -property installationPath') do (set VS_INSTALL_PATH=%%g) +echo VS/BuildTools install path: %VS_INSTALL_PATH% +call "%VS_INSTALL_PATH%\VC\Auxiliary\Build\vcvars64.bat" + +if "%1"=="" ( + + if not exist "out/build/amd64-windows-vs" ( + + echo CMake configure+generate Visual Studio solution... + cmake -DUSE_OBJECT_LIBS=OFF --preset amd64-windows-vs + if %ERRORLEVEL% GEQ 1 exit /B 1 + + echo Fixing up generated Visual Studio projects... + PowerShell -NoProfile -ExecutionPolicy Bypass -Command "& 'build/powershell/Fixup-Generated-VisualStudio-Projects.ps1'" + if %ERRORLEVEL% GEQ 1 exit /B 1 + + ) else ( + echo CMake build dir already exists, not re-running CMake configure+generate + ) + + echo Opening generated Visual Studio solution... + cmake --open out/build/amd64-windows-vs + if %ERRORLEVEL% GEQ 1 exit /B 1 + +) else if "%1"=="vs_cmake" ( + + echo Opening Visual Studio... + devenv . + if %ERRORLEVEL% GEQ 1 exit /B 1 + +) else if "%1"=="vscode" ( + + echo Opening VSCode... + code . + if %ERRORLEVEL% GEQ 1 exit /B 1 + +) else if "%1"=="vs_static" ( + + echo Opening Visual Studio w/ non-CMake Amalgam sln... + devenv Amalgam.sln + if %ERRORLEVEL% GEQ 1 exit /B 1 + +) else ( + + echo Unknown arg for opening project: %1 + exit /b 1 + +) + +exit /b 0 \ No newline at end of file diff --git a/src/3rd_party/date/date.h b/src/3rd_party/date/date.h new file mode 100644 index 00000000..6960e8cd --- /dev/null +++ b/src/3rd_party/date/date.h @@ -0,0 +1,8234 @@ +#ifndef DATE_H +#define DATE_H + +// The MIT License (MIT) +// +// Copyright (c) 2015, 2016, 2017 Howard Hinnant +// Copyright (c) 2016 Adrian Colomitchi +// Copyright (c) 2017 Florian Dang +// Copyright (c) 2017 Paul Thompson +// Copyright (c) 2018, 2019 Tomasz Kamiński +// Copyright (c) 2019 Jiangang Zhuang +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// Our apologies. When the previous paragraph was written, lowercase had not yet +// been invented (that would involve another several millennia of evolution). +// We did not mean to shout. + +#ifndef HAS_STRING_VIEW +# if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) +# define HAS_STRING_VIEW 1 +# else +# define HAS_STRING_VIEW 0 +# endif +#endif // HAS_STRING_VIEW + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if HAS_STRING_VIEW +# include +#endif +#include +#include + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 7) +# pragma GCC diagnostic ignored "-Wpedantic" +# endif +# if __GNUC__ < 5 + // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers +# pragma GCC diagnostic ignored "-Wmissing-field-initializers" +# endif +#endif + +#ifdef _MSC_VER +# pragma warning(push) +// warning C4127: conditional expression is constant +# pragma warning(disable : 4127) +#endif + +namespace date +{ + +//---------------+ +// Configuration | +//---------------+ + +#ifndef ONLY_C_LOCALE +# define ONLY_C_LOCALE 0 +#endif + +#if defined(_MSC_VER) && (!defined(__clang__) || (_MSC_VER < 1910)) +// MSVC +# ifndef _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING +# define _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING +# endif +# if _MSC_VER < 1910 +// before VS2017 +# define CONSTDATA const +# define CONSTCD11 +# define CONSTCD14 +# define NOEXCEPT _NOEXCEPT +# else +// VS2017 and later +# define CONSTDATA constexpr const +# define CONSTCD11 constexpr +# define CONSTCD14 constexpr +# define NOEXCEPT noexcept +# endif + +#elif defined(__SUNPRO_CC) && __SUNPRO_CC <= 0x5150 +// Oracle Developer Studio 12.6 and earlier +# define CONSTDATA constexpr const +# define CONSTCD11 constexpr +# define CONSTCD14 +# define NOEXCEPT noexcept + +#elif __cplusplus >= 201402 +// C++14 +# define CONSTDATA constexpr const +# define CONSTCD11 constexpr +# define CONSTCD14 constexpr +# define NOEXCEPT noexcept +#else +// C++11 +# define CONSTDATA constexpr const +# define CONSTCD11 constexpr +# define CONSTCD14 +# define NOEXCEPT noexcept +#endif + +#ifndef HAS_UNCAUGHT_EXCEPTIONS +# if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) +# define HAS_UNCAUGHT_EXCEPTIONS 1 +# else +# define HAS_UNCAUGHT_EXCEPTIONS 0 +# endif +#endif // HAS_UNCAUGHT_EXCEPTIONS + +#ifndef HAS_VOID_T +# if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) +# define HAS_VOID_T 1 +# else +# define HAS_VOID_T 0 +# endif +#endif // HAS_VOID_T + +// Protect from Oracle sun macro +#ifdef sun +# undef sun +#endif + +// Work around for a NVCC compiler bug which causes it to fail +// to compile std::ratio_{multiply,divide} when used directly +// in the std::chrono::duration template instantiations below +namespace detail { +template +using ratio_multiply = decltype(std::ratio_multiply{}); + +template +using ratio_divide = decltype(std::ratio_divide{}); +} // namespace detail + +//-----------+ +// Interface | +//-----------+ + +// durations + +using days = std::chrono::duration + , std::chrono::hours::period>>; + +using weeks = std::chrono::duration + , days::period>>; + +using years = std::chrono::duration + , days::period>>; + +using months = std::chrono::duration + >>; + +// time_point + +template + using sys_time = std::chrono::time_point; + +using sys_days = sys_time; +using sys_seconds = sys_time; + +struct local_t {}; + +template + using local_time = std::chrono::time_point; + +using local_seconds = local_time; +using local_days = local_time; + +// types + +struct last_spec +{ + explicit last_spec() = default; +}; + +class day; +class month; +class year; + +class weekday; +class weekday_indexed; +class weekday_last; + +class month_day; +class month_day_last; +class month_weekday; +class month_weekday_last; + +class year_month; + +class year_month_day; +class year_month_day_last; +class year_month_weekday; +class year_month_weekday_last; + +// date composition operators + +CONSTCD11 year_month operator/(const year& y, const month& m) NOEXCEPT; +CONSTCD11 year_month operator/(const year& y, int m) NOEXCEPT; + +CONSTCD11 month_day operator/(const day& d, const month& m) NOEXCEPT; +CONSTCD11 month_day operator/(const day& d, int m) NOEXCEPT; +CONSTCD11 month_day operator/(const month& m, const day& d) NOEXCEPT; +CONSTCD11 month_day operator/(const month& m, int d) NOEXCEPT; +CONSTCD11 month_day operator/(int m, const day& d) NOEXCEPT; + +CONSTCD11 month_day_last operator/(const month& m, last_spec) NOEXCEPT; +CONSTCD11 month_day_last operator/(int m, last_spec) NOEXCEPT; +CONSTCD11 month_day_last operator/(last_spec, const month& m) NOEXCEPT; +CONSTCD11 month_day_last operator/(last_spec, int m) NOEXCEPT; + +CONSTCD11 month_weekday operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT; +CONSTCD11 month_weekday operator/(int m, const weekday_indexed& wdi) NOEXCEPT; +CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT; +CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, int m) NOEXCEPT; + +CONSTCD11 month_weekday_last operator/(const month& m, const weekday_last& wdl) NOEXCEPT; +CONSTCD11 month_weekday_last operator/(int m, const weekday_last& wdl) NOEXCEPT; +CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, const month& m) NOEXCEPT; +CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, int m) NOEXCEPT; + +CONSTCD11 year_month_day operator/(const year_month& ym, const day& d) NOEXCEPT; +CONSTCD11 year_month_day operator/(const year_month& ym, int d) NOEXCEPT; +CONSTCD11 year_month_day operator/(const year& y, const month_day& md) NOEXCEPT; +CONSTCD11 year_month_day operator/(int y, const month_day& md) NOEXCEPT; +CONSTCD11 year_month_day operator/(const month_day& md, const year& y) NOEXCEPT; +CONSTCD11 year_month_day operator/(const month_day& md, int y) NOEXCEPT; + +CONSTCD11 + year_month_day_last operator/(const year_month& ym, last_spec) NOEXCEPT; +CONSTCD11 + year_month_day_last operator/(const year& y, const month_day_last& mdl) NOEXCEPT; +CONSTCD11 + year_month_day_last operator/(int y, const month_day_last& mdl) NOEXCEPT; +CONSTCD11 + year_month_day_last operator/(const month_day_last& mdl, const year& y) NOEXCEPT; +CONSTCD11 + year_month_day_last operator/(const month_day_last& mdl, int y) NOEXCEPT; + +CONSTCD11 +year_month_weekday +operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT; + +CONSTCD11 +year_month_weekday +operator/(const year& y, const month_weekday& mwd) NOEXCEPT; + +CONSTCD11 +year_month_weekday +operator/(int y, const month_weekday& mwd) NOEXCEPT; + +CONSTCD11 +year_month_weekday +operator/(const month_weekday& mwd, const year& y) NOEXCEPT; + +CONSTCD11 +year_month_weekday +operator/(const month_weekday& mwd, int y) NOEXCEPT; + +CONSTCD11 +year_month_weekday_last +operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT; + +CONSTCD11 +year_month_weekday_last +operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT; + +CONSTCD11 +year_month_weekday_last +operator/(int y, const month_weekday_last& mwdl) NOEXCEPT; + +CONSTCD11 +year_month_weekday_last +operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT; + +CONSTCD11 +year_month_weekday_last +operator/(const month_weekday_last& mwdl, int y) NOEXCEPT; + +// Detailed interface + +// day + +class day +{ + unsigned char d_; + +public: + day() = default; + explicit CONSTCD11 day(unsigned d) NOEXCEPT; + + CONSTCD14 day& operator++() NOEXCEPT; + CONSTCD14 day operator++(int) NOEXCEPT; + CONSTCD14 day& operator--() NOEXCEPT; + CONSTCD14 day operator--(int) NOEXCEPT; + + CONSTCD14 day& operator+=(const days& d) NOEXCEPT; + CONSTCD14 day& operator-=(const days& d) NOEXCEPT; + + CONSTCD11 explicit operator unsigned() const NOEXCEPT; + CONSTCD11 bool ok() const NOEXCEPT; +}; + +CONSTCD11 bool operator==(const day& x, const day& y) NOEXCEPT; +CONSTCD11 bool operator!=(const day& x, const day& y) NOEXCEPT; +CONSTCD11 bool operator< (const day& x, const day& y) NOEXCEPT; +CONSTCD11 bool operator> (const day& x, const day& y) NOEXCEPT; +CONSTCD11 bool operator<=(const day& x, const day& y) NOEXCEPT; +CONSTCD11 bool operator>=(const day& x, const day& y) NOEXCEPT; + +CONSTCD11 day operator+(const day& x, const days& y) NOEXCEPT; +CONSTCD11 day operator+(const days& x, const day& y) NOEXCEPT; +CONSTCD11 day operator-(const day& x, const days& y) NOEXCEPT; +CONSTCD11 days operator-(const day& x, const day& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const day& d); + +// month + +class month +{ + unsigned char m_; + +public: + month() = default; + explicit CONSTCD11 month(unsigned m) NOEXCEPT; + + CONSTCD14 month& operator++() NOEXCEPT; + CONSTCD14 month operator++(int) NOEXCEPT; + CONSTCD14 month& operator--() NOEXCEPT; + CONSTCD14 month operator--(int) NOEXCEPT; + + CONSTCD14 month& operator+=(const months& m) NOEXCEPT; + CONSTCD14 month& operator-=(const months& m) NOEXCEPT; + + CONSTCD11 explicit operator unsigned() const NOEXCEPT; + CONSTCD11 bool ok() const NOEXCEPT; +}; + +CONSTCD11 bool operator==(const month& x, const month& y) NOEXCEPT; +CONSTCD11 bool operator!=(const month& x, const month& y) NOEXCEPT; +CONSTCD11 bool operator< (const month& x, const month& y) NOEXCEPT; +CONSTCD11 bool operator> (const month& x, const month& y) NOEXCEPT; +CONSTCD11 bool operator<=(const month& x, const month& y) NOEXCEPT; +CONSTCD11 bool operator>=(const month& x, const month& y) NOEXCEPT; + +CONSTCD14 month operator+(const month& x, const months& y) NOEXCEPT; +CONSTCD14 month operator+(const months& x, const month& y) NOEXCEPT; +CONSTCD14 month operator-(const month& x, const months& y) NOEXCEPT; +CONSTCD14 months operator-(const month& x, const month& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const month& m); + +// year + +class year +{ + short y_; + +public: + year() = default; + explicit CONSTCD11 year(int y) NOEXCEPT; + + CONSTCD14 year& operator++() NOEXCEPT; + CONSTCD14 year operator++(int) NOEXCEPT; + CONSTCD14 year& operator--() NOEXCEPT; + CONSTCD14 year operator--(int) NOEXCEPT; + + CONSTCD14 year& operator+=(const years& y) NOEXCEPT; + CONSTCD14 year& operator-=(const years& y) NOEXCEPT; + + CONSTCD11 year operator-() const NOEXCEPT; + CONSTCD11 year operator+() const NOEXCEPT; + + CONSTCD11 bool is_leap() const NOEXCEPT; + + CONSTCD11 explicit operator int() const NOEXCEPT; + CONSTCD11 bool ok() const NOEXCEPT; + + static CONSTCD11 year min() NOEXCEPT { return year{-32767}; } + static CONSTCD11 year max() NOEXCEPT { return year{32767}; } +}; + +CONSTCD11 bool operator==(const year& x, const year& y) NOEXCEPT; +CONSTCD11 bool operator!=(const year& x, const year& y) NOEXCEPT; +CONSTCD11 bool operator< (const year& x, const year& y) NOEXCEPT; +CONSTCD11 bool operator> (const year& x, const year& y) NOEXCEPT; +CONSTCD11 bool operator<=(const year& x, const year& y) NOEXCEPT; +CONSTCD11 bool operator>=(const year& x, const year& y) NOEXCEPT; + +CONSTCD11 year operator+(const year& x, const years& y) NOEXCEPT; +CONSTCD11 year operator+(const years& x, const year& y) NOEXCEPT; +CONSTCD11 year operator-(const year& x, const years& y) NOEXCEPT; +CONSTCD11 years operator-(const year& x, const year& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const year& y); + +// weekday + +class weekday +{ + unsigned char wd_; +public: + weekday() = default; + explicit CONSTCD11 weekday(unsigned wd) NOEXCEPT; + CONSTCD14 weekday(const sys_days& dp) NOEXCEPT; + CONSTCD14 explicit weekday(const local_days& dp) NOEXCEPT; + + CONSTCD14 weekday& operator++() NOEXCEPT; + CONSTCD14 weekday operator++(int) NOEXCEPT; + CONSTCD14 weekday& operator--() NOEXCEPT; + CONSTCD14 weekday operator--(int) NOEXCEPT; + + CONSTCD14 weekday& operator+=(const days& d) NOEXCEPT; + CONSTCD14 weekday& operator-=(const days& d) NOEXCEPT; + + CONSTCD11 bool ok() const NOEXCEPT; + + CONSTCD11 unsigned c_encoding() const NOEXCEPT; + CONSTCD11 unsigned iso_encoding() const NOEXCEPT; + + CONSTCD11 weekday_indexed operator[](unsigned index) const NOEXCEPT; + CONSTCD11 weekday_last operator[](last_spec) const NOEXCEPT; + +private: + static CONSTCD14 unsigned char weekday_from_days(int z) NOEXCEPT; + + friend CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT; + friend CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT; + friend CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT; + template + friend std::basic_ostream& + operator<<(std::basic_ostream& os, const weekday& wd); + friend class weekday_indexed; +}; + +CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT; +CONSTCD11 bool operator!=(const weekday& x, const weekday& y) NOEXCEPT; + +CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT; +CONSTCD14 weekday operator+(const days& x, const weekday& y) NOEXCEPT; +CONSTCD14 weekday operator-(const weekday& x, const days& y) NOEXCEPT; +CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const weekday& wd); + +// weekday_indexed + +class weekday_indexed +{ + unsigned char wd_ : 4; + unsigned char index_ : 4; + +public: + weekday_indexed() = default; + CONSTCD11 weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT; + + CONSTCD11 date::weekday weekday() const NOEXCEPT; + CONSTCD11 unsigned index() const NOEXCEPT; + CONSTCD11 bool ok() const NOEXCEPT; +}; + +CONSTCD11 bool operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT; +CONSTCD11 bool operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const weekday_indexed& wdi); + +// weekday_last + +class weekday_last +{ + date::weekday wd_; + +public: + explicit CONSTCD11 weekday_last(const date::weekday& wd) NOEXCEPT; + + CONSTCD11 date::weekday weekday() const NOEXCEPT; + CONSTCD11 bool ok() const NOEXCEPT; +}; + +CONSTCD11 bool operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT; +CONSTCD11 bool operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const weekday_last& wdl); + +namespace detail +{ + +struct unspecified_month_disambiguator {}; + +} // namespace detail + +// year_month + +class year_month +{ + date::year y_; + date::month m_; + +public: + year_month() = default; + CONSTCD11 year_month(const date::year& y, const date::month& m) NOEXCEPT; + + CONSTCD11 date::year year() const NOEXCEPT; + CONSTCD11 date::month month() const NOEXCEPT; + + template + CONSTCD14 year_month& operator+=(const months& dm) NOEXCEPT; + template + CONSTCD14 year_month& operator-=(const months& dm) NOEXCEPT; + CONSTCD14 year_month& operator+=(const years& dy) NOEXCEPT; + CONSTCD14 year_month& operator-=(const years& dy) NOEXCEPT; + + CONSTCD11 bool ok() const NOEXCEPT; +}; + +CONSTCD11 bool operator==(const year_month& x, const year_month& y) NOEXCEPT; +CONSTCD11 bool operator!=(const year_month& x, const year_month& y) NOEXCEPT; +CONSTCD11 bool operator< (const year_month& x, const year_month& y) NOEXCEPT; +CONSTCD11 bool operator> (const year_month& x, const year_month& y) NOEXCEPT; +CONSTCD11 bool operator<=(const year_month& x, const year_month& y) NOEXCEPT; +CONSTCD11 bool operator>=(const year_month& x, const year_month& y) NOEXCEPT; + +template +CONSTCD14 year_month operator+(const year_month& ym, const months& dm) NOEXCEPT; +template +CONSTCD14 year_month operator+(const months& dm, const year_month& ym) NOEXCEPT; +template +CONSTCD14 year_month operator-(const year_month& ym, const months& dm) NOEXCEPT; + +CONSTCD11 months operator-(const year_month& x, const year_month& y) NOEXCEPT; +CONSTCD11 year_month operator+(const year_month& ym, const years& dy) NOEXCEPT; +CONSTCD11 year_month operator+(const years& dy, const year_month& ym) NOEXCEPT; +CONSTCD11 year_month operator-(const year_month& ym, const years& dy) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month& ym); + +// month_day + +class month_day +{ + date::month m_; + date::day d_; + +public: + month_day() = default; + CONSTCD11 month_day(const date::month& m, const date::day& d) NOEXCEPT; + + CONSTCD11 date::month month() const NOEXCEPT; + CONSTCD11 date::day day() const NOEXCEPT; + + CONSTCD14 bool ok() const NOEXCEPT; +}; + +CONSTCD11 bool operator==(const month_day& x, const month_day& y) NOEXCEPT; +CONSTCD11 bool operator!=(const month_day& x, const month_day& y) NOEXCEPT; +CONSTCD11 bool operator< (const month_day& x, const month_day& y) NOEXCEPT; +CONSTCD11 bool operator> (const month_day& x, const month_day& y) NOEXCEPT; +CONSTCD11 bool operator<=(const month_day& x, const month_day& y) NOEXCEPT; +CONSTCD11 bool operator>=(const month_day& x, const month_day& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const month_day& md); + +// month_day_last + +class month_day_last +{ + date::month m_; + +public: + CONSTCD11 explicit month_day_last(const date::month& m) NOEXCEPT; + + CONSTCD11 date::month month() const NOEXCEPT; + CONSTCD11 bool ok() const NOEXCEPT; +}; + +CONSTCD11 bool operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT; +CONSTCD11 bool operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT; +CONSTCD11 bool operator< (const month_day_last& x, const month_day_last& y) NOEXCEPT; +CONSTCD11 bool operator> (const month_day_last& x, const month_day_last& y) NOEXCEPT; +CONSTCD11 bool operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT; +CONSTCD11 bool operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const month_day_last& mdl); + +// month_weekday + +class month_weekday +{ + date::month m_; + date::weekday_indexed wdi_; +public: + CONSTCD11 month_weekday(const date::month& m, + const date::weekday_indexed& wdi) NOEXCEPT; + + CONSTCD11 date::month month() const NOEXCEPT; + CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT; + + CONSTCD11 bool ok() const NOEXCEPT; +}; + +CONSTCD11 bool operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT; +CONSTCD11 bool operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const month_weekday& mwd); + +// month_weekday_last + +class month_weekday_last +{ + date::month m_; + date::weekday_last wdl_; + +public: + CONSTCD11 month_weekday_last(const date::month& m, + const date::weekday_last& wd) NOEXCEPT; + + CONSTCD11 date::month month() const NOEXCEPT; + CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT; + + CONSTCD11 bool ok() const NOEXCEPT; +}; + +CONSTCD11 + bool operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT; +CONSTCD11 + bool operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const month_weekday_last& mwdl); + +// class year_month_day + +class year_month_day +{ + date::year y_; + date::month m_; + date::day d_; + +public: + year_month_day() = default; + CONSTCD11 year_month_day(const date::year& y, const date::month& m, + const date::day& d) NOEXCEPT; + CONSTCD14 year_month_day(const year_month_day_last& ymdl) NOEXCEPT; + + CONSTCD14 year_month_day(sys_days dp) NOEXCEPT; + CONSTCD14 explicit year_month_day(local_days dp) NOEXCEPT; + + template + CONSTCD14 year_month_day& operator+=(const months& m) NOEXCEPT; + template + CONSTCD14 year_month_day& operator-=(const months& m) NOEXCEPT; + CONSTCD14 year_month_day& operator+=(const years& y) NOEXCEPT; + CONSTCD14 year_month_day& operator-=(const years& y) NOEXCEPT; + + CONSTCD11 date::year year() const NOEXCEPT; + CONSTCD11 date::month month() const NOEXCEPT; + CONSTCD11 date::day day() const NOEXCEPT; + + CONSTCD14 operator sys_days() const NOEXCEPT; + CONSTCD14 explicit operator local_days() const NOEXCEPT; + CONSTCD14 bool ok() const NOEXCEPT; + +private: + static CONSTCD14 year_month_day from_days(days dp) NOEXCEPT; + CONSTCD14 days to_days() const NOEXCEPT; +}; + +CONSTCD11 bool operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT; +CONSTCD11 bool operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT; +CONSTCD11 bool operator< (const year_month_day& x, const year_month_day& y) NOEXCEPT; +CONSTCD11 bool operator> (const year_month_day& x, const year_month_day& y) NOEXCEPT; +CONSTCD11 bool operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT; +CONSTCD11 bool operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT; + +template +CONSTCD14 year_month_day operator+(const year_month_day& ymd, const months& dm) NOEXCEPT; +template +CONSTCD14 year_month_day operator+(const months& dm, const year_month_day& ymd) NOEXCEPT; +template +CONSTCD14 year_month_day operator-(const year_month_day& ymd, const months& dm) NOEXCEPT; +CONSTCD11 year_month_day operator+(const year_month_day& ymd, const years& dy) NOEXCEPT; +CONSTCD11 year_month_day operator+(const years& dy, const year_month_day& ymd) NOEXCEPT; +CONSTCD11 year_month_day operator-(const year_month_day& ymd, const years& dy) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month_day& ymd); + +// year_month_day_last + +class year_month_day_last +{ + date::year y_; + date::month_day_last mdl_; + +public: + CONSTCD11 year_month_day_last(const date::year& y, + const date::month_day_last& mdl) NOEXCEPT; + + template + CONSTCD14 year_month_day_last& operator+=(const months& m) NOEXCEPT; + template + CONSTCD14 year_month_day_last& operator-=(const months& m) NOEXCEPT; + CONSTCD14 year_month_day_last& operator+=(const years& y) NOEXCEPT; + CONSTCD14 year_month_day_last& operator-=(const years& y) NOEXCEPT; + + CONSTCD11 date::year year() const NOEXCEPT; + CONSTCD11 date::month month() const NOEXCEPT; + CONSTCD11 date::month_day_last month_day_last() const NOEXCEPT; + CONSTCD14 date::day day() const NOEXCEPT; + + CONSTCD14 operator sys_days() const NOEXCEPT; + CONSTCD14 explicit operator local_days() const NOEXCEPT; + CONSTCD11 bool ok() const NOEXCEPT; +}; + +CONSTCD11 + bool operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT; +CONSTCD11 + bool operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT; +CONSTCD11 + bool operator< (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT; +CONSTCD11 + bool operator> (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT; +CONSTCD11 + bool operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT; +CONSTCD11 + bool operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT; + +template +CONSTCD14 +year_month_day_last +operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT; + +template +CONSTCD14 +year_month_day_last +operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT; + +CONSTCD11 +year_month_day_last +operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT; + +CONSTCD11 +year_month_day_last +operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT; + +template +CONSTCD14 +year_month_day_last +operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT; + +CONSTCD11 +year_month_day_last +operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month_day_last& ymdl); + +// year_month_weekday + +class year_month_weekday +{ + date::year y_; + date::month m_; + date::weekday_indexed wdi_; + +public: + year_month_weekday() = default; + CONSTCD11 year_month_weekday(const date::year& y, const date::month& m, + const date::weekday_indexed& wdi) NOEXCEPT; + CONSTCD14 year_month_weekday(const sys_days& dp) NOEXCEPT; + CONSTCD14 explicit year_month_weekday(const local_days& dp) NOEXCEPT; + + template + CONSTCD14 year_month_weekday& operator+=(const months& m) NOEXCEPT; + template + CONSTCD14 year_month_weekday& operator-=(const months& m) NOEXCEPT; + CONSTCD14 year_month_weekday& operator+=(const years& y) NOEXCEPT; + CONSTCD14 year_month_weekday& operator-=(const years& y) NOEXCEPT; + + CONSTCD11 date::year year() const NOEXCEPT; + CONSTCD11 date::month month() const NOEXCEPT; + CONSTCD11 date::weekday weekday() const NOEXCEPT; + CONSTCD11 unsigned index() const NOEXCEPT; + CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT; + + CONSTCD14 operator sys_days() const NOEXCEPT; + CONSTCD14 explicit operator local_days() const NOEXCEPT; + CONSTCD14 bool ok() const NOEXCEPT; + +private: + static CONSTCD14 year_month_weekday from_days(days dp) NOEXCEPT; + CONSTCD14 days to_days() const NOEXCEPT; +}; + +CONSTCD11 + bool operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT; +CONSTCD11 + bool operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT; + +template +CONSTCD14 +year_month_weekday +operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT; + +template +CONSTCD14 +year_month_weekday +operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT; + +CONSTCD11 +year_month_weekday +operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT; + +CONSTCD11 +year_month_weekday +operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT; + +template +CONSTCD14 +year_month_weekday +operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT; + +CONSTCD11 +year_month_weekday +operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month_weekday& ymwdi); + +// year_month_weekday_last + +class year_month_weekday_last +{ + date::year y_; + date::month m_; + date::weekday_last wdl_; + +public: + CONSTCD11 year_month_weekday_last(const date::year& y, const date::month& m, + const date::weekday_last& wdl) NOEXCEPT; + + template + CONSTCD14 year_month_weekday_last& operator+=(const months& m) NOEXCEPT; + template + CONSTCD14 year_month_weekday_last& operator-=(const months& m) NOEXCEPT; + CONSTCD14 year_month_weekday_last& operator+=(const years& y) NOEXCEPT; + CONSTCD14 year_month_weekday_last& operator-=(const years& y) NOEXCEPT; + + CONSTCD11 date::year year() const NOEXCEPT; + CONSTCD11 date::month month() const NOEXCEPT; + CONSTCD11 date::weekday weekday() const NOEXCEPT; + CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT; + + CONSTCD14 operator sys_days() const NOEXCEPT; + CONSTCD14 explicit operator local_days() const NOEXCEPT; + CONSTCD11 bool ok() const NOEXCEPT; + +private: + CONSTCD14 days to_days() const NOEXCEPT; +}; + +CONSTCD11 +bool +operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT; + +CONSTCD11 +bool +operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT; + +template +CONSTCD14 +year_month_weekday_last +operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT; + +template +CONSTCD14 +year_month_weekday_last +operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT; + +CONSTCD11 +year_month_weekday_last +operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT; + +CONSTCD11 +year_month_weekday_last +operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT; + +template +CONSTCD14 +year_month_weekday_last +operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT; + +CONSTCD11 +year_month_weekday_last +operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month_weekday_last& ymwdl); + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) +inline namespace literals +{ + +CONSTCD11 date::day operator "" _d(unsigned long long d) NOEXCEPT; +CONSTCD11 date::year operator "" _y(unsigned long long y) NOEXCEPT; + +} // inline namespace literals +#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900) + +// CONSTDATA date::month January{1}; +// CONSTDATA date::month February{2}; +// CONSTDATA date::month March{3}; +// CONSTDATA date::month April{4}; +// CONSTDATA date::month May{5}; +// CONSTDATA date::month June{6}; +// CONSTDATA date::month July{7}; +// CONSTDATA date::month August{8}; +// CONSTDATA date::month September{9}; +// CONSTDATA date::month October{10}; +// CONSTDATA date::month November{11}; +// CONSTDATA date::month December{12}; +// +// CONSTDATA date::weekday Sunday{0u}; +// CONSTDATA date::weekday Monday{1u}; +// CONSTDATA date::weekday Tuesday{2u}; +// CONSTDATA date::weekday Wednesday{3u}; +// CONSTDATA date::weekday Thursday{4u}; +// CONSTDATA date::weekday Friday{5u}; +// CONSTDATA date::weekday Saturday{6u}; + +#if HAS_VOID_T + +template > +struct is_clock + : std::false_type +{}; + +template +struct is_clock> + : std::true_type +{}; + +template inline constexpr bool is_clock_v = is_clock::value; + +#endif // HAS_VOID_T + +//----------------+ +// Implementation | +//----------------+ + +// utilities +namespace detail { + +template> +class save_istream +{ +protected: + std::basic_ios& is_; + CharT fill_; + std::ios::fmtflags flags_; + std::streamsize precision_; + std::streamsize width_; + std::basic_ostream* tie_; + std::locale loc_; + +public: + ~save_istream() + { + is_.fill(fill_); + is_.flags(flags_); + is_.precision(precision_); + is_.width(width_); + is_.imbue(loc_); + is_.tie(tie_); + } + + save_istream(const save_istream&) = delete; + save_istream& operator=(const save_istream&) = delete; + + explicit save_istream(std::basic_ios& is) + : is_(is) + , fill_(is.fill()) + , flags_(is.flags()) + , precision_(is.precision()) + , width_(is.width(0)) + , tie_(is.tie(nullptr)) + , loc_(is.getloc()) + { + if (tie_ != nullptr) + tie_->flush(); + } +}; + +template> +class save_ostream + : private save_istream +{ +public: + ~save_ostream() + { + if ((this->flags_ & std::ios::unitbuf) && +#if HAS_UNCAUGHT_EXCEPTIONS + std::uncaught_exceptions() == 0 && +#else + !std::uncaught_exception() && +#endif + this->is_.good()) + this->is_.rdbuf()->pubsync(); + } + + save_ostream(const save_ostream&) = delete; + save_ostream& operator=(const save_ostream&) = delete; + + explicit save_ostream(std::basic_ios& os) + : save_istream(os) + { + } +}; + +template +struct choose_trunc_type +{ + static const int digits = std::numeric_limits::digits; + using type = typename std::conditional + < + digits < 32, + std::int32_t, + typename std::conditional + < + digits < 64, + std::int64_t, +#ifdef __SIZEOF_INT128__ + __int128 +#else + std::int64_t +#endif + >::type + >::type; +}; + +template +CONSTCD11 +inline +typename std::enable_if +< + !std::chrono::treat_as_floating_point::value, + T +>::type +trunc(T t) NOEXCEPT +{ + return t; +} + +template +CONSTCD14 +inline +typename std::enable_if +< + std::chrono::treat_as_floating_point::value, + T +>::type +trunc(T t) NOEXCEPT +{ + using std::numeric_limits; + using I = typename choose_trunc_type::type; + CONSTDATA auto digits = numeric_limits::digits; + static_assert(digits < numeric_limits::digits, ""); + CONSTDATA auto max = I{1} << (digits-1); + CONSTDATA auto min = -max; + const auto negative = t < T{0}; + if (min <= t && t <= max && t != 0 && t == t) + { + t = static_cast(static_cast(t)); + if (t == 0 && negative) + t = -t; + } + return t; +} + +template +struct static_gcd +{ + static const std::intmax_t value = static_gcd::value; +}; + +template +struct static_gcd +{ + static const std::intmax_t value = Xp; +}; + +template <> +struct static_gcd<0, 0> +{ + static const std::intmax_t value = 1; +}; + +template +struct no_overflow +{ +private: + static const std::intmax_t gcd_n1_n2 = static_gcd::value; + static const std::intmax_t gcd_d1_d2 = static_gcd::value; + static const std::intmax_t n1 = R1::num / gcd_n1_n2; + static const std::intmax_t d1 = R1::den / gcd_d1_d2; + static const std::intmax_t n2 = R2::num / gcd_n1_n2; + static const std::intmax_t d2 = R2::den / gcd_d1_d2; +#ifdef __cpp_constexpr + static const std::intmax_t max = std::numeric_limits::max(); +#else + static const std::intmax_t max = LLONG_MAX; +#endif + + template + struct mul // overflow == false + { + static const std::intmax_t value = Xp * Yp; + }; + + template + struct mul + { + static const std::intmax_t value = 1; + }; + +public: + static const bool value = (n1 <= max / d2) && (n2 <= max / d1); + typedef std::ratio::value, + mul::value> type; +}; + +} // detail + +// trunc towards zero +template +CONSTCD11 +inline +typename std::enable_if +< + detail::no_overflow::value, + To +>::type +trunc(const std::chrono::duration& d) +{ + return To{detail::trunc(std::chrono::duration_cast(d).count())}; +} + +template +CONSTCD11 +inline +typename std::enable_if +< + !detail::no_overflow::value, + To +>::type +trunc(const std::chrono::duration& d) +{ + using std::chrono::duration_cast; + using std::chrono::duration; + using rep = typename std::common_type::type; + return To{detail::trunc(duration_cast(duration_cast>(d)).count())}; +} + +#ifndef HAS_CHRONO_ROUNDING +# if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190023918 || (_MSC_FULL_VER >= 190000000 && defined (__clang__))) +# define HAS_CHRONO_ROUNDING 1 +# elif defined(__cpp_lib_chrono) && __cplusplus > 201402 && __cpp_lib_chrono >= 201510 +# define HAS_CHRONO_ROUNDING 1 +# elif defined(_LIBCPP_VERSION) && __cplusplus > 201402 && _LIBCPP_VERSION >= 3800 +# define HAS_CHRONO_ROUNDING 1 +# else +# define HAS_CHRONO_ROUNDING 0 +# endif +#endif // HAS_CHRONO_ROUNDING + +#if HAS_CHRONO_ROUNDING == 0 + +// round down +template +CONSTCD14 +inline +typename std::enable_if +< + detail::no_overflow::value, + To +>::type +floor(const std::chrono::duration& d) +{ + auto t = trunc(d); + if (t > d) + return t - To{1}; + return t; +} + +template +CONSTCD14 +inline +typename std::enable_if +< + !detail::no_overflow::value, + To +>::type +floor(const std::chrono::duration& d) +{ + using rep = typename std::common_type::type; + return floor(floor>(d)); +} + +// round to nearest, to even on tie +template +CONSTCD14 +inline +To +round(const std::chrono::duration& d) +{ + auto t0 = floor(d); + auto t1 = t0 + To{1}; + if (t1 == To{0} && t0 < To{0}) + t1 = -t1; + auto diff0 = d - t0; + auto diff1 = t1 - d; + if (diff0 == diff1) + { + if (t0 - trunc(t0/2)*2 == To{0}) + return t0; + return t1; + } + if (diff0 < diff1) + return t0; + return t1; +} + +// round up +template +CONSTCD14 +inline +To +ceil(const std::chrono::duration& d) +{ + auto t = trunc(d); + if (t < d) + return t + To{1}; + return t; +} + +template ::is_signed + >::type> +CONSTCD11 +std::chrono::duration +abs(std::chrono::duration d) +{ + return d >= d.zero() ? d : static_cast(-d); +} + +// round down +template +CONSTCD11 +inline +std::chrono::time_point +floor(const std::chrono::time_point& tp) +{ + using std::chrono::time_point; + return time_point{date::floor(tp.time_since_epoch())}; +} + +// round to nearest, to even on tie +template +CONSTCD11 +inline +std::chrono::time_point +round(const std::chrono::time_point& tp) +{ + using std::chrono::time_point; + return time_point{round(tp.time_since_epoch())}; +} + +// round up +template +CONSTCD11 +inline +std::chrono::time_point +ceil(const std::chrono::time_point& tp) +{ + using std::chrono::time_point; + return time_point{ceil(tp.time_since_epoch())}; +} + +#else // HAS_CHRONO_ROUNDING == 1 + +using std::chrono::floor; +using std::chrono::ceil; +using std::chrono::round; +using std::chrono::abs; + +#endif // HAS_CHRONO_ROUNDING + +namespace detail +{ + +template +CONSTCD14 +inline +typename std::enable_if +< + !std::chrono::treat_as_floating_point::value, + To +>::type +round_i(const std::chrono::duration& d) +{ + return round(d); +} + +template +CONSTCD14 +inline +typename std::enable_if +< + std::chrono::treat_as_floating_point::value, + To +>::type +round_i(const std::chrono::duration& d) +{ + return d; +} + +template +CONSTCD11 +inline +std::chrono::time_point +round_i(const std::chrono::time_point& tp) +{ + using std::chrono::time_point; + return time_point{round_i(tp.time_since_epoch())}; +} + +} // detail + +// trunc towards zero +template +CONSTCD11 +inline +std::chrono::time_point +trunc(const std::chrono::time_point& tp) +{ + using std::chrono::time_point; + return time_point{trunc(tp.time_since_epoch())}; +} + +// day + +CONSTCD11 inline day::day(unsigned d) NOEXCEPT : d_(static_cast(d)) {} +CONSTCD14 inline day& day::operator++() NOEXCEPT {++d_; return *this;} +CONSTCD14 inline day day::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;} +CONSTCD14 inline day& day::operator--() NOEXCEPT {--d_; return *this;} +CONSTCD14 inline day day::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;} +CONSTCD14 inline day& day::operator+=(const days& d) NOEXCEPT {*this = *this + d; return *this;} +CONSTCD14 inline day& day::operator-=(const days& d) NOEXCEPT {*this = *this - d; return *this;} +CONSTCD11 inline day::operator unsigned() const NOEXCEPT {return d_;} +CONSTCD11 inline bool day::ok() const NOEXCEPT {return 1 <= d_ && d_ <= 31;} + +CONSTCD11 +inline +bool +operator==(const day& x, const day& y) NOEXCEPT +{ + return static_cast(x) == static_cast(y); +} + +CONSTCD11 +inline +bool +operator!=(const day& x, const day& y) NOEXCEPT +{ + return !(x == y); +} + +CONSTCD11 +inline +bool +operator<(const day& x, const day& y) NOEXCEPT +{ + return static_cast(x) < static_cast(y); +} + +CONSTCD11 +inline +bool +operator>(const day& x, const day& y) NOEXCEPT +{ + return y < x; +} + +CONSTCD11 +inline +bool +operator<=(const day& x, const day& y) NOEXCEPT +{ + return !(y < x); +} + +CONSTCD11 +inline +bool +operator>=(const day& x, const day& y) NOEXCEPT +{ + return !(x < y); +} + +CONSTCD11 +inline +days +operator-(const day& x, const day& y) NOEXCEPT +{ + return days{static_cast(static_cast(x) + - static_cast(y))}; +} + +CONSTCD11 +inline +day +operator+(const day& x, const days& y) NOEXCEPT +{ + return day{static_cast(x) + static_cast(y.count())}; +} + +CONSTCD11 +inline +day +operator+(const days& x, const day& y) NOEXCEPT +{ + return y + x; +} + +CONSTCD11 +inline +day +operator-(const day& x, const days& y) NOEXCEPT +{ + return x + -y; +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const day& d) +{ + detail::save_ostream _(os); + os.fill('0'); + os.flags(std::ios::dec | std::ios::right); + os.width(2); + os << static_cast(d); + return os; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const day& d) +{ + detail::low_level_fmt(os, d); + if (!d.ok()) + os << " is not a valid day"; + return os; +} + +// month + +CONSTCD11 inline month::month(unsigned m) NOEXCEPT : m_(static_cast(m)) {} +CONSTCD14 inline month& month::operator++() NOEXCEPT {*this += months{1}; return *this;} +CONSTCD14 inline month month::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;} +CONSTCD14 inline month& month::operator--() NOEXCEPT {*this -= months{1}; return *this;} +CONSTCD14 inline month month::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;} + +CONSTCD14 +inline +month& +month::operator+=(const months& m) NOEXCEPT +{ + *this = *this + m; + return *this; +} + +CONSTCD14 +inline +month& +month::operator-=(const months& m) NOEXCEPT +{ + *this = *this - m; + return *this; +} + +CONSTCD11 inline month::operator unsigned() const NOEXCEPT {return m_;} +CONSTCD11 inline bool month::ok() const NOEXCEPT {return 1 <= m_ && m_ <= 12;} + +CONSTCD11 +inline +bool +operator==(const month& x, const month& y) NOEXCEPT +{ + return static_cast(x) == static_cast(y); +} + +CONSTCD11 +inline +bool +operator!=(const month& x, const month& y) NOEXCEPT +{ + return !(x == y); +} + +CONSTCD11 +inline +bool +operator<(const month& x, const month& y) NOEXCEPT +{ + return static_cast(x) < static_cast(y); +} + +CONSTCD11 +inline +bool +operator>(const month& x, const month& y) NOEXCEPT +{ + return y < x; +} + +CONSTCD11 +inline +bool +operator<=(const month& x, const month& y) NOEXCEPT +{ + return !(y < x); +} + +CONSTCD11 +inline +bool +operator>=(const month& x, const month& y) NOEXCEPT +{ + return !(x < y); +} + +CONSTCD14 +inline +months +operator-(const month& x, const month& y) NOEXCEPT +{ + auto const d = static_cast(x) - static_cast(y); + return months(d <= 11 ? d : d + 12); +} + +CONSTCD14 +inline +month +operator+(const month& x, const months& y) NOEXCEPT +{ + auto const mu = static_cast(static_cast(x)) + y.count() - 1; + auto const yr = (mu >= 0 ? mu : mu-11) / 12; + return month{static_cast(mu - yr * 12 + 1)}; +} + +CONSTCD14 +inline +month +operator+(const months& x, const month& y) NOEXCEPT +{ + return y + x; +} + +CONSTCD14 +inline +month +operator-(const month& x, const months& y) NOEXCEPT +{ + return x + -y; +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const month& m) +{ + if (m.ok()) + { + CharT fmt[] = {'%', 'b', 0}; + os << format(os.getloc(), fmt, m); + } + else + os << static_cast(m); + return os; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const month& m) +{ + detail::low_level_fmt(os, m); + if (!m.ok()) + os << " is not a valid month"; + return os; +} + +// year + +CONSTCD11 inline year::year(int y) NOEXCEPT : y_(static_cast(y)) {} +CONSTCD14 inline year& year::operator++() NOEXCEPT {++y_; return *this;} +CONSTCD14 inline year year::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;} +CONSTCD14 inline year& year::operator--() NOEXCEPT {--y_; return *this;} +CONSTCD14 inline year year::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;} +CONSTCD14 inline year& year::operator+=(const years& y) NOEXCEPT {*this = *this + y; return *this;} +CONSTCD14 inline year& year::operator-=(const years& y) NOEXCEPT {*this = *this - y; return *this;} +CONSTCD11 inline year year::operator-() const NOEXCEPT {return year{-y_};} +CONSTCD11 inline year year::operator+() const NOEXCEPT {return *this;} + +CONSTCD11 +inline +bool +year::is_leap() const NOEXCEPT +{ + return y_ % 4 == 0 && (y_ % 100 != 0 || y_ % 400 == 0); +} + +CONSTCD11 inline year::operator int() const NOEXCEPT {return y_;} + +CONSTCD11 +inline +bool +year::ok() const NOEXCEPT +{ + return y_ != std::numeric_limits::min(); +} + +CONSTCD11 +inline +bool +operator==(const year& x, const year& y) NOEXCEPT +{ + return static_cast(x) == static_cast(y); +} + +CONSTCD11 +inline +bool +operator!=(const year& x, const year& y) NOEXCEPT +{ + return !(x == y); +} + +CONSTCD11 +inline +bool +operator<(const year& x, const year& y) NOEXCEPT +{ + return static_cast(x) < static_cast(y); +} + +CONSTCD11 +inline +bool +operator>(const year& x, const year& y) NOEXCEPT +{ + return y < x; +} + +CONSTCD11 +inline +bool +operator<=(const year& x, const year& y) NOEXCEPT +{ + return !(y < x); +} + +CONSTCD11 +inline +bool +operator>=(const year& x, const year& y) NOEXCEPT +{ + return !(x < y); +} + +CONSTCD11 +inline +years +operator-(const year& x, const year& y) NOEXCEPT +{ + return years{static_cast(x) - static_cast(y)}; +} + +CONSTCD11 +inline +year +operator+(const year& x, const years& y) NOEXCEPT +{ + return year{static_cast(x) + y.count()}; +} + +CONSTCD11 +inline +year +operator+(const years& x, const year& y) NOEXCEPT +{ + return y + x; +} + +CONSTCD11 +inline +year +operator-(const year& x, const years& y) NOEXCEPT +{ + return year{static_cast(x) - y.count()}; +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const year& y) +{ + detail::save_ostream _(os); + os.fill('0'); + os.flags(std::ios::dec | std::ios::internal); + os.width(4 + (y < year{0})); + os.imbue(std::locale::classic()); + os << static_cast(y); + return os; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const year& y) +{ + detail::low_level_fmt(os, y); + if (!y.ok()) + os << " is not a valid year"; + return os; +} + +// weekday + +CONSTCD14 +inline +unsigned char +weekday::weekday_from_days(int z) NOEXCEPT +{ + auto u = static_cast(z); + return static_cast(z >= -4 ? (u+4) % 7 : u % 7); +} + +CONSTCD11 +inline +weekday::weekday(unsigned wd) NOEXCEPT + : wd_(static_cast(wd != 7 ? wd : 0)) + {} + +CONSTCD14 +inline +weekday::weekday(const sys_days& dp) NOEXCEPT + : wd_(weekday_from_days(dp.time_since_epoch().count())) + {} + +CONSTCD14 +inline +weekday::weekday(const local_days& dp) NOEXCEPT + : wd_(weekday_from_days(dp.time_since_epoch().count())) + {} + +CONSTCD14 inline weekday& weekday::operator++() NOEXCEPT {*this += days{1}; return *this;} +CONSTCD14 inline weekday weekday::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;} +CONSTCD14 inline weekday& weekday::operator--() NOEXCEPT {*this -= days{1}; return *this;} +CONSTCD14 inline weekday weekday::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;} + +CONSTCD14 +inline +weekday& +weekday::operator+=(const days& d) NOEXCEPT +{ + *this = *this + d; + return *this; +} + +CONSTCD14 +inline +weekday& +weekday::operator-=(const days& d) NOEXCEPT +{ + *this = *this - d; + return *this; +} + +CONSTCD11 inline bool weekday::ok() const NOEXCEPT {return wd_ <= 6;} + +CONSTCD11 +inline +unsigned weekday::c_encoding() const NOEXCEPT +{ + return unsigned{wd_}; +} + +CONSTCD11 +inline +unsigned weekday::iso_encoding() const NOEXCEPT +{ + return unsigned{((wd_ == 0u) ? 7u : wd_)}; +} + +CONSTCD11 +inline +bool +operator==(const weekday& x, const weekday& y) NOEXCEPT +{ + return x.wd_ == y.wd_; +} + +CONSTCD11 +inline +bool +operator!=(const weekday& x, const weekday& y) NOEXCEPT +{ + return !(x == y); +} + +CONSTCD14 +inline +days +operator-(const weekday& x, const weekday& y) NOEXCEPT +{ + auto const wdu = x.wd_ - y.wd_; + auto const wk = (wdu >= 0 ? wdu : wdu-6) / 7; + return days{wdu - wk * 7}; +} + +CONSTCD14 +inline +weekday +operator+(const weekday& x, const days& y) NOEXCEPT +{ + auto const wdu = static_cast(static_cast(x.wd_)) + y.count(); + auto const wk = (wdu >= 0 ? wdu : wdu-6) / 7; + return weekday{static_cast(wdu - wk * 7)}; +} + +CONSTCD14 +inline +weekday +operator+(const days& x, const weekday& y) NOEXCEPT +{ + return y + x; +} + +CONSTCD14 +inline +weekday +operator-(const weekday& x, const days& y) NOEXCEPT +{ + return x + -y; +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const weekday& wd) +{ + if (wd.ok()) + { + CharT fmt[] = {'%', 'a', 0}; + os << format(fmt, wd); + } + else + os << wd.c_encoding(); + return os; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const weekday& wd) +{ + detail::low_level_fmt(os, wd); + if (!wd.ok()) + os << " is not a valid weekday"; + return os; +} + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) +inline namespace literals +{ + +CONSTCD11 +inline +date::day +operator "" _d(unsigned long long d) NOEXCEPT +{ + return date::day{static_cast(d)}; +} + +CONSTCD11 +inline +date::year +operator "" _y(unsigned long long y) NOEXCEPT +{ + return date::year(static_cast(y)); +} +#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900) + +CONSTDATA date::last_spec last{}; + +CONSTDATA date::month jan{1}; +CONSTDATA date::month feb{2}; +CONSTDATA date::month mar{3}; +CONSTDATA date::month apr{4}; +CONSTDATA date::month may{5}; +CONSTDATA date::month jun{6}; +CONSTDATA date::month jul{7}; +CONSTDATA date::month aug{8}; +CONSTDATA date::month sep{9}; +CONSTDATA date::month oct{10}; +CONSTDATA date::month nov{11}; +CONSTDATA date::month dec{12}; + +CONSTDATA date::weekday sun{0u}; +CONSTDATA date::weekday mon{1u}; +CONSTDATA date::weekday tue{2u}; +CONSTDATA date::weekday wed{3u}; +CONSTDATA date::weekday thu{4u}; +CONSTDATA date::weekday fri{5u}; +CONSTDATA date::weekday sat{6u}; + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) +} // inline namespace literals +#endif + +CONSTDATA date::month January{1}; +CONSTDATA date::month February{2}; +CONSTDATA date::month March{3}; +CONSTDATA date::month April{4}; +CONSTDATA date::month May{5}; +CONSTDATA date::month June{6}; +CONSTDATA date::month July{7}; +CONSTDATA date::month August{8}; +CONSTDATA date::month September{9}; +CONSTDATA date::month October{10}; +CONSTDATA date::month November{11}; +CONSTDATA date::month December{12}; + +CONSTDATA date::weekday Monday{1}; +CONSTDATA date::weekday Tuesday{2}; +CONSTDATA date::weekday Wednesday{3}; +CONSTDATA date::weekday Thursday{4}; +CONSTDATA date::weekday Friday{5}; +CONSTDATA date::weekday Saturday{6}; +CONSTDATA date::weekday Sunday{7}; + +// weekday_indexed + +CONSTCD11 +inline +weekday +weekday_indexed::weekday() const NOEXCEPT +{ + return date::weekday{static_cast(wd_)}; +} + +CONSTCD11 inline unsigned weekday_indexed::index() const NOEXCEPT {return index_;} + +CONSTCD11 +inline +bool +weekday_indexed::ok() const NOEXCEPT +{ + return weekday().ok() && 1 <= index_ && index_ <= 5; +} + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wconversion" +#endif // __GNUC__ + +CONSTCD11 +inline +weekday_indexed::weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT + : wd_(static_cast(static_cast(wd.wd_))) + , index_(static_cast(index)) + {} + +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif // __GNUC__ + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const weekday_indexed& wdi) +{ + return low_level_fmt(os, wdi.weekday()) << '[' << wdi.index() << ']'; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const weekday_indexed& wdi) +{ + detail::low_level_fmt(os, wdi); + if (!wdi.ok()) + os << " is not a valid weekday_indexed"; + return os; +} + +CONSTCD11 +inline +weekday_indexed +weekday::operator[](unsigned index) const NOEXCEPT +{ + return {*this, index}; +} + +CONSTCD11 +inline +bool +operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT +{ + return x.weekday() == y.weekday() && x.index() == y.index(); +} + +CONSTCD11 +inline +bool +operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT +{ + return !(x == y); +} + +// weekday_last + +CONSTCD11 inline date::weekday weekday_last::weekday() const NOEXCEPT {return wd_;} +CONSTCD11 inline bool weekday_last::ok() const NOEXCEPT {return wd_.ok();} +CONSTCD11 inline weekday_last::weekday_last(const date::weekday& wd) NOEXCEPT : wd_(wd) {} + +CONSTCD11 +inline +bool +operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT +{ + return x.weekday() == y.weekday(); +} + +CONSTCD11 +inline +bool +operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT +{ + return !(x == y); +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const weekday_last& wdl) +{ + return low_level_fmt(os, wdl.weekday()) << "[last]"; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const weekday_last& wdl) +{ + detail::low_level_fmt(os, wdl); + if (!wdl.ok()) + os << " is not a valid weekday_last"; + return os; +} + +CONSTCD11 +inline +weekday_last +weekday::operator[](last_spec) const NOEXCEPT +{ + return weekday_last{*this}; +} + +// year_month + +CONSTCD11 +inline +year_month::year_month(const date::year& y, const date::month& m) NOEXCEPT + : y_(y) + , m_(m) + {} + +CONSTCD11 inline year year_month::year() const NOEXCEPT {return y_;} +CONSTCD11 inline month year_month::month() const NOEXCEPT {return m_;} +CONSTCD11 inline bool year_month::ok() const NOEXCEPT {return y_.ok() && m_.ok();} + +template +CONSTCD14 +inline +year_month& +year_month::operator+=(const months& dm) NOEXCEPT +{ + *this = *this + dm; + return *this; +} + +template +CONSTCD14 +inline +year_month& +year_month::operator-=(const months& dm) NOEXCEPT +{ + *this = *this - dm; + return *this; +} + +CONSTCD14 +inline +year_month& +year_month::operator+=(const years& dy) NOEXCEPT +{ + *this = *this + dy; + return *this; +} + +CONSTCD14 +inline +year_month& +year_month::operator-=(const years& dy) NOEXCEPT +{ + *this = *this - dy; + return *this; +} + +CONSTCD11 +inline +bool +operator==(const year_month& x, const year_month& y) NOEXCEPT +{ + return x.year() == y.year() && x.month() == y.month(); +} + +CONSTCD11 +inline +bool +operator!=(const year_month& x, const year_month& y) NOEXCEPT +{ + return !(x == y); +} + +CONSTCD11 +inline +bool +operator<(const year_month& x, const year_month& y) NOEXCEPT +{ + return x.year() < y.year() ? true + : (x.year() > y.year() ? false + : (x.month() < y.month())); +} + +CONSTCD11 +inline +bool +operator>(const year_month& x, const year_month& y) NOEXCEPT +{ + return y < x; +} + +CONSTCD11 +inline +bool +operator<=(const year_month& x, const year_month& y) NOEXCEPT +{ + return !(y < x); +} + +CONSTCD11 +inline +bool +operator>=(const year_month& x, const year_month& y) NOEXCEPT +{ + return !(x < y); +} + +template +CONSTCD14 +inline +year_month +operator+(const year_month& ym, const months& dm) NOEXCEPT +{ + auto dmi = static_cast(static_cast(ym.month())) - 1 + dm.count(); + auto dy = (dmi >= 0 ? dmi : dmi-11) / 12; + dmi = dmi - dy * 12 + 1; + return (ym.year() + years(dy)) / month(static_cast(dmi)); +} + +template +CONSTCD14 +inline +year_month +operator+(const months& dm, const year_month& ym) NOEXCEPT +{ + return ym + dm; +} + +template +CONSTCD14 +inline +year_month +operator-(const year_month& ym, const months& dm) NOEXCEPT +{ + return ym + -dm; +} + +CONSTCD11 +inline +months +operator-(const year_month& x, const year_month& y) NOEXCEPT +{ + return (x.year() - y.year()) + + months(static_cast(x.month()) - static_cast(y.month())); +} + +CONSTCD11 +inline +year_month +operator+(const year_month& ym, const years& dy) NOEXCEPT +{ + return (ym.year() + dy) / ym.month(); +} + +CONSTCD11 +inline +year_month +operator+(const years& dy, const year_month& ym) NOEXCEPT +{ + return ym + dy; +} + +CONSTCD11 +inline +year_month +operator-(const year_month& ym, const years& dy) NOEXCEPT +{ + return ym + -dy; +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const year_month& ym) +{ + low_level_fmt(os, ym.year()) << '/'; + return low_level_fmt(os, ym.month()); +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month& ym) +{ + detail::low_level_fmt(os, ym); + if (!ym.ok()) + os << " is not a valid year_month"; + return os; +} + +// month_day + +CONSTCD11 +inline +month_day::month_day(const date::month& m, const date::day& d) NOEXCEPT + : m_(m) + , d_(d) + {} + +CONSTCD11 inline date::month month_day::month() const NOEXCEPT {return m_;} +CONSTCD11 inline date::day month_day::day() const NOEXCEPT {return d_;} + +CONSTCD14 +inline +bool +month_day::ok() const NOEXCEPT +{ + CONSTDATA date::day d[] = + { + date::day(31), date::day(29), date::day(31), + date::day(30), date::day(31), date::day(30), + date::day(31), date::day(31), date::day(30), + date::day(31), date::day(30), date::day(31) + }; + return m_.ok() && date::day{1} <= d_ && d_ <= d[static_cast(m_)-1]; +} + +CONSTCD11 +inline +bool +operator==(const month_day& x, const month_day& y) NOEXCEPT +{ + return x.month() == y.month() && x.day() == y.day(); +} + +CONSTCD11 +inline +bool +operator!=(const month_day& x, const month_day& y) NOEXCEPT +{ + return !(x == y); +} + +CONSTCD11 +inline +bool +operator<(const month_day& x, const month_day& y) NOEXCEPT +{ + return x.month() < y.month() ? true + : (x.month() > y.month() ? false + : (x.day() < y.day())); +} + +CONSTCD11 +inline +bool +operator>(const month_day& x, const month_day& y) NOEXCEPT +{ + return y < x; +} + +CONSTCD11 +inline +bool +operator<=(const month_day& x, const month_day& y) NOEXCEPT +{ + return !(y < x); +} + +CONSTCD11 +inline +bool +operator>=(const month_day& x, const month_day& y) NOEXCEPT +{ + return !(x < y); +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const month_day& md) +{ + low_level_fmt(os, md.month()) << '/'; + return low_level_fmt(os, md.day()); +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const month_day& md) +{ + detail::low_level_fmt(os, md); + if (!md.ok()) + os << " is not a valid month_day"; + return os; +} + +// month_day_last + +CONSTCD11 inline month month_day_last::month() const NOEXCEPT {return m_;} +CONSTCD11 inline bool month_day_last::ok() const NOEXCEPT {return m_.ok();} +CONSTCD11 inline month_day_last::month_day_last(const date::month& m) NOEXCEPT : m_(m) {} + +CONSTCD11 +inline +bool +operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT +{ + return x.month() == y.month(); +} + +CONSTCD11 +inline +bool +operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT +{ + return !(x == y); +} + +CONSTCD11 +inline +bool +operator<(const month_day_last& x, const month_day_last& y) NOEXCEPT +{ + return x.month() < y.month(); +} + +CONSTCD11 +inline +bool +operator>(const month_day_last& x, const month_day_last& y) NOEXCEPT +{ + return y < x; +} + +CONSTCD11 +inline +bool +operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT +{ + return !(y < x); +} + +CONSTCD11 +inline +bool +operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT +{ + return !(x < y); +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const month_day_last& mdl) +{ + return low_level_fmt(os, mdl.month()) << "/last"; +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const month_day_last& mdl) +{ + detail::low_level_fmt(os, mdl); + if (!mdl.ok()) + os << " is not a valid month_day_last"; + return os; +} + +// month_weekday + +CONSTCD11 +inline +month_weekday::month_weekday(const date::month& m, + const date::weekday_indexed& wdi) NOEXCEPT + : m_(m) + , wdi_(wdi) + {} + +CONSTCD11 inline month month_weekday::month() const NOEXCEPT {return m_;} + +CONSTCD11 +inline +weekday_indexed +month_weekday::weekday_indexed() const NOEXCEPT +{ + return wdi_; +} + +CONSTCD11 +inline +bool +month_weekday::ok() const NOEXCEPT +{ + return m_.ok() && wdi_.ok(); +} + +CONSTCD11 +inline +bool +operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT +{ + return x.month() == y.month() && x.weekday_indexed() == y.weekday_indexed(); +} + +CONSTCD11 +inline +bool +operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT +{ + return !(x == y); +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const month_weekday& mwd) +{ + low_level_fmt(os, mwd.month()) << '/'; + return low_level_fmt(os, mwd.weekday_indexed()); +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const month_weekday& mwd) +{ + detail::low_level_fmt(os, mwd); + if (!mwd.ok()) + os << " is not a valid month_weekday"; + return os; +} + +// month_weekday_last + +CONSTCD11 +inline +month_weekday_last::month_weekday_last(const date::month& m, + const date::weekday_last& wdl) NOEXCEPT + : m_(m) + , wdl_(wdl) + {} + +CONSTCD11 inline month month_weekday_last::month() const NOEXCEPT {return m_;} + +CONSTCD11 +inline +weekday_last +month_weekday_last::weekday_last() const NOEXCEPT +{ + return wdl_; +} + +CONSTCD11 +inline +bool +month_weekday_last::ok() const NOEXCEPT +{ + return m_.ok() && wdl_.ok(); +} + +CONSTCD11 +inline +bool +operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT +{ + return x.month() == y.month() && x.weekday_last() == y.weekday_last(); +} + +CONSTCD11 +inline +bool +operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT +{ + return !(x == y); +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const month_weekday_last& mwdl) +{ + low_level_fmt(os, mwdl.month()) << '/'; + return low_level_fmt(os, mwdl.weekday_last()); +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const month_weekday_last& mwdl) +{ + detail::low_level_fmt(os, mwdl); + if (!mwdl.ok()) + os << " is not a valid month_weekday_last"; + return os; +} + +// year_month_day_last + +CONSTCD11 +inline +year_month_day_last::year_month_day_last(const date::year& y, + const date::month_day_last& mdl) NOEXCEPT + : y_(y) + , mdl_(mdl) + {} + +template +CONSTCD14 +inline +year_month_day_last& +year_month_day_last::operator+=(const months& m) NOEXCEPT +{ + *this = *this + m; + return *this; +} + +template +CONSTCD14 +inline +year_month_day_last& +year_month_day_last::operator-=(const months& m) NOEXCEPT +{ + *this = *this - m; + return *this; +} + +CONSTCD14 +inline +year_month_day_last& +year_month_day_last::operator+=(const years& y) NOEXCEPT +{ + *this = *this + y; + return *this; +} + +CONSTCD14 +inline +year_month_day_last& +year_month_day_last::operator-=(const years& y) NOEXCEPT +{ + *this = *this - y; + return *this; +} + +CONSTCD11 inline year year_month_day_last::year() const NOEXCEPT {return y_;} +CONSTCD11 inline month year_month_day_last::month() const NOEXCEPT {return mdl_.month();} + +CONSTCD11 +inline +month_day_last +year_month_day_last::month_day_last() const NOEXCEPT +{ + return mdl_; +} + +CONSTCD14 +inline +day +year_month_day_last::day() const NOEXCEPT +{ + CONSTDATA date::day d[] = + { + date::day(31), date::day(28), date::day(31), + date::day(30), date::day(31), date::day(30), + date::day(31), date::day(31), date::day(30), + date::day(31), date::day(30), date::day(31) + }; + return (month() != February || !y_.is_leap()) && mdl_.ok() ? + d[static_cast(month()) - 1] : date::day{29}; +} + +CONSTCD14 +inline +year_month_day_last::operator sys_days() const NOEXCEPT +{ + return sys_days(year()/month()/day()); +} + +CONSTCD14 +inline +year_month_day_last::operator local_days() const NOEXCEPT +{ + return local_days(year()/month()/day()); +} + +CONSTCD11 +inline +bool +year_month_day_last::ok() const NOEXCEPT +{ + return y_.ok() && mdl_.ok(); +} + +CONSTCD11 +inline +bool +operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT +{ + return x.year() == y.year() && x.month_day_last() == y.month_day_last(); +} + +CONSTCD11 +inline +bool +operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT +{ + return !(x == y); +} + +CONSTCD11 +inline +bool +operator<(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT +{ + return x.year() < y.year() ? true + : (x.year() > y.year() ? false + : (x.month_day_last() < y.month_day_last())); +} + +CONSTCD11 +inline +bool +operator>(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT +{ + return y < x; +} + +CONSTCD11 +inline +bool +operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT +{ + return !(y < x); +} + +CONSTCD11 +inline +bool +operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT +{ + return !(x < y); +} + +namespace detail +{ + +template +std::basic_ostream& +low_level_fmt(std::basic_ostream& os, const year_month_day_last& ymdl) +{ + low_level_fmt(os, ymdl.year()) << '/'; + return low_level_fmt(os, ymdl.month_day_last()); +} + +} // namespace detail + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month_day_last& ymdl) +{ + detail::low_level_fmt(os, ymdl); + if (!ymdl.ok()) + os << " is not a valid year_month_day_last"; + return os; +} + +template +CONSTCD14 +inline +year_month_day_last +operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT +{ + return (ymdl.year() / ymdl.month() + dm) / last; +} + +template +CONSTCD14 +inline +year_month_day_last +operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT +{ + return ymdl + dm; +} + +template +CONSTCD14 +inline +year_month_day_last +operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT +{ + return ymdl + (-dm); +} + +CONSTCD11 +inline +year_month_day_last +operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT +{ + return {ymdl.year()+dy, ymdl.month_day_last()}; +} + +CONSTCD11 +inline +year_month_day_last +operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT +{ + return ymdl + dy; +} + +CONSTCD11 +inline +year_month_day_last +operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT +{ + return ymdl + (-dy); +} + +// year_month_day + +CONSTCD11 +inline +year_month_day::year_month_day(const date::year& y, const date::month& m, + const date::day& d) NOEXCEPT + : y_(y) + , m_(m) + , d_(d) + {} + +CONSTCD14 +inline +year_month_day::year_month_day(const year_month_day_last& ymdl) NOEXCEPT + : y_(ymdl.year()) + , m_(ymdl.month()) + , d_(ymdl.day()) + {} + +CONSTCD14 +inline +year_month_day::year_month_day(sys_days dp) NOEXCEPT + : year_month_day(from_days(dp.time_since_epoch())) + {} + +CONSTCD14 +inline +year_month_day::year_month_day(local_days dp) NOEXCEPT + : year_month_day(from_days(dp.time_since_epoch())) + {} + +CONSTCD11 inline year year_month_day::year() const NOEXCEPT {return y_;} +CONSTCD11 inline month year_month_day::month() const NOEXCEPT {return m_;} +CONSTCD11 inline day year_month_day::day() const NOEXCEPT {return d_;} + +template +CONSTCD14 +inline +year_month_day& +year_month_day::operator+=(const months& m) NOEXCEPT +{ + *this = *this + m; + return *this; +} + +template +CONSTCD14 +inline +year_month_day& +year_month_day::operator-=(const months& m) NOEXCEPT +{ + *this = *this - m; + return *this; +} + +CONSTCD14 +inline +year_month_day& +year_month_day::operator+=(const years& y) NOEXCEPT +{ + *this = *this + y; + return *this; +} + +CONSTCD14 +inline +year_month_day& +year_month_day::operator-=(const years& y) NOEXCEPT +{ + *this = *this - y; + return *this; +} + +CONSTCD14 +inline +days +year_month_day::to_days() const NOEXCEPT +{ + static_assert(std::numeric_limits::digits >= 18, + "This algorithm has not been ported to a 16 bit unsigned integer"); + static_assert(std::numeric_limits::digits >= 20, + "This algorithm has not been ported to a 16 bit signed integer"); + auto const y = static_cast(y_) - (m_ <= February); + auto const m = static_cast(m_); + auto const d = static_cast(d_); + auto const era = (y >= 0 ? y : y-399) / 400; + auto const yoe = static_cast(y - era * 400); // [0, 399] + auto const doy = (153*(m > 2 ? m-3 : m+9) + 2)/5 + d-1; // [0, 365] + auto const doe = yoe * 365 + yoe/4 - yoe/100 + doy; // [0, 146096] + return days{era * 146097 + static_cast(doe) - 719468}; +} + +CONSTCD14 +inline +year_month_day::operator sys_days() const NOEXCEPT +{ + return sys_days{to_days()}; +} + +CONSTCD14 +inline +year_month_day::operator local_days() const NOEXCEPT +{ + return local_days{to_days()}; +} + +CONSTCD14 +inline +bool +year_month_day::ok() const NOEXCEPT +{ + if (!(y_.ok() && m_.ok())) + return false; + return date::day{1} <= d_ && d_ <= (y_ / m_ / last).day(); +} + +CONSTCD11 +inline +bool +operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT +{ + return x.year() == y.year() && x.month() == y.month() && x.day() == y.day(); +} + +CONSTCD11 +inline +bool +operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT +{ + return !(x == y); +} + +CONSTCD11 +inline +bool +operator<(const year_month_day& x, const year_month_day& y) NOEXCEPT +{ + return x.year() < y.year() ? true + : (x.year() > y.year() ? false + : (x.month() < y.month() ? true + : (x.month() > y.month() ? false + : (x.day() < y.day())))); +} + +CONSTCD11 +inline +bool +operator>(const year_month_day& x, const year_month_day& y) NOEXCEPT +{ + return y < x; +} + +CONSTCD11 +inline +bool +operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT +{ + return !(y < x); +} + +CONSTCD11 +inline +bool +operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT +{ + return !(x < y); +} + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month_day& ymd) +{ + detail::save_ostream _(os); + os.fill('0'); + os.flags(std::ios::dec | std::ios::right); + os.imbue(std::locale::classic()); + os << static_cast(ymd.year()) << '-'; + os.width(2); + os << static_cast(ymd.month()) << '-'; + os.width(2); + os << static_cast(ymd.day()); + if (!ymd.ok()) + os << " is not a valid year_month_day"; + return os; +} + +CONSTCD14 +inline +year_month_day +year_month_day::from_days(days dp) NOEXCEPT +{ + static_assert(std::numeric_limits::digits >= 18, + "This algorithm has not been ported to a 16 bit unsigned integer"); + static_assert(std::numeric_limits::digits >= 20, + "This algorithm has not been ported to a 16 bit signed integer"); + auto const z = dp.count() + 719468; + auto const era = (z >= 0 ? z : z - 146096) / 146097; + auto const doe = static_cast(z - era * 146097); // [0, 146096] + auto const yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365; // [0, 399] + auto const y = static_cast(yoe) + era * 400; + auto const doy = doe - (365*yoe + yoe/4 - yoe/100); // [0, 365] + auto const mp = (5*doy + 2)/153; // [0, 11] + auto const d = doy - (153*mp+2)/5 + 1; // [1, 31] + auto const m = mp < 10 ? mp+3 : mp-9; // [1, 12] + return year_month_day{date::year{y + (m <= 2)}, date::month(m), date::day(d)}; +} + +template +CONSTCD14 +inline +year_month_day +operator+(const year_month_day& ymd, const months& dm) NOEXCEPT +{ + return (ymd.year() / ymd.month() + dm) / ymd.day(); +} + +template +CONSTCD14 +inline +year_month_day +operator+(const months& dm, const year_month_day& ymd) NOEXCEPT +{ + return ymd + dm; +} + +template +CONSTCD14 +inline +year_month_day +operator-(const year_month_day& ymd, const months& dm) NOEXCEPT +{ + return ymd + (-dm); +} + +CONSTCD11 +inline +year_month_day +operator+(const year_month_day& ymd, const years& dy) NOEXCEPT +{ + return (ymd.year() + dy) / ymd.month() / ymd.day(); +} + +CONSTCD11 +inline +year_month_day +operator+(const years& dy, const year_month_day& ymd) NOEXCEPT +{ + return ymd + dy; +} + +CONSTCD11 +inline +year_month_day +operator-(const year_month_day& ymd, const years& dy) NOEXCEPT +{ + return ymd + (-dy); +} + +// year_month_weekday + +CONSTCD11 +inline +year_month_weekday::year_month_weekday(const date::year& y, const date::month& m, + const date::weekday_indexed& wdi) + NOEXCEPT + : y_(y) + , m_(m) + , wdi_(wdi) + {} + +CONSTCD14 +inline +year_month_weekday::year_month_weekday(const sys_days& dp) NOEXCEPT + : year_month_weekday(from_days(dp.time_since_epoch())) + {} + +CONSTCD14 +inline +year_month_weekday::year_month_weekday(const local_days& dp) NOEXCEPT + : year_month_weekday(from_days(dp.time_since_epoch())) + {} + +template +CONSTCD14 +inline +year_month_weekday& +year_month_weekday::operator+=(const months& m) NOEXCEPT +{ + *this = *this + m; + return *this; +} + +template +CONSTCD14 +inline +year_month_weekday& +year_month_weekday::operator-=(const months& m) NOEXCEPT +{ + *this = *this - m; + return *this; +} + +CONSTCD14 +inline +year_month_weekday& +year_month_weekday::operator+=(const years& y) NOEXCEPT +{ + *this = *this + y; + return *this; +} + +CONSTCD14 +inline +year_month_weekday& +year_month_weekday::operator-=(const years& y) NOEXCEPT +{ + *this = *this - y; + return *this; +} + +CONSTCD11 inline year year_month_weekday::year() const NOEXCEPT {return y_;} +CONSTCD11 inline month year_month_weekday::month() const NOEXCEPT {return m_;} + +CONSTCD11 +inline +weekday +year_month_weekday::weekday() const NOEXCEPT +{ + return wdi_.weekday(); +} + +CONSTCD11 +inline +unsigned +year_month_weekday::index() const NOEXCEPT +{ + return wdi_.index(); +} + +CONSTCD11 +inline +weekday_indexed +year_month_weekday::weekday_indexed() const NOEXCEPT +{ + return wdi_; +} + +CONSTCD14 +inline +year_month_weekday::operator sys_days() const NOEXCEPT +{ + return sys_days{to_days()}; +} + +CONSTCD14 +inline +year_month_weekday::operator local_days() const NOEXCEPT +{ + return local_days{to_days()}; +} + +CONSTCD14 +inline +bool +year_month_weekday::ok() const NOEXCEPT +{ + if (!y_.ok() || !m_.ok() || !wdi_.weekday().ok() || wdi_.index() < 1) + return false; + if (wdi_.index() <= 4) + return true; + auto d2 = wdi_.weekday() - date::weekday(static_cast(y_/m_/1)) + + days((wdi_.index()-1)*7 + 1); + return static_cast(d2.count()) <= static_cast((y_/m_/last).day()); +} + +CONSTCD14 +inline +year_month_weekday +year_month_weekday::from_days(days d) NOEXCEPT +{ + sys_days dp{d}; + auto const wd = date::weekday(dp); + auto const ymd = year_month_day(dp); + return {ymd.year(), ymd.month(), wd[(static_cast(ymd.day())-1)/7+1]}; +} + +CONSTCD14 +inline +days +year_month_weekday::to_days() const NOEXCEPT +{ + auto d = sys_days(y_/m_/1); + return (d + (wdi_.weekday() - date::weekday(d) + days{(wdi_.index()-1)*7}) + ).time_since_epoch(); +} + +CONSTCD11 +inline +bool +operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT +{ + return x.year() == y.year() && x.month() == y.month() && + x.weekday_indexed() == y.weekday_indexed(); +} + +CONSTCD11 +inline +bool +operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT +{ + return !(x == y); +} + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month_weekday& ymwdi) +{ + detail::low_level_fmt(os, ymwdi.year()) << '/'; + detail::low_level_fmt(os, ymwdi.month()) << '/'; + detail::low_level_fmt(os, ymwdi.weekday_indexed()); + if (!ymwdi.ok()) + os << " is not a valid year_month_weekday"; + return os; +} + +template +CONSTCD14 +inline +year_month_weekday +operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT +{ + return (ymwd.year() / ymwd.month() + dm) / ymwd.weekday_indexed(); +} + +template +CONSTCD14 +inline +year_month_weekday +operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT +{ + return ymwd + dm; +} + +template +CONSTCD14 +inline +year_month_weekday +operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT +{ + return ymwd + (-dm); +} + +CONSTCD11 +inline +year_month_weekday +operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT +{ + return {ymwd.year()+dy, ymwd.month(), ymwd.weekday_indexed()}; +} + +CONSTCD11 +inline +year_month_weekday +operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT +{ + return ymwd + dy; +} + +CONSTCD11 +inline +year_month_weekday +operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT +{ + return ymwd + (-dy); +} + +// year_month_weekday_last + +CONSTCD11 +inline +year_month_weekday_last::year_month_weekday_last(const date::year& y, + const date::month& m, + const date::weekday_last& wdl) NOEXCEPT + : y_(y) + , m_(m) + , wdl_(wdl) + {} + +template +CONSTCD14 +inline +year_month_weekday_last& +year_month_weekday_last::operator+=(const months& m) NOEXCEPT +{ + *this = *this + m; + return *this; +} + +template +CONSTCD14 +inline +year_month_weekday_last& +year_month_weekday_last::operator-=(const months& m) NOEXCEPT +{ + *this = *this - m; + return *this; +} + +CONSTCD14 +inline +year_month_weekday_last& +year_month_weekday_last::operator+=(const years& y) NOEXCEPT +{ + *this = *this + y; + return *this; +} + +CONSTCD14 +inline +year_month_weekday_last& +year_month_weekday_last::operator-=(const years& y) NOEXCEPT +{ + *this = *this - y; + return *this; +} + +CONSTCD11 inline year year_month_weekday_last::year() const NOEXCEPT {return y_;} +CONSTCD11 inline month year_month_weekday_last::month() const NOEXCEPT {return m_;} + +CONSTCD11 +inline +weekday +year_month_weekday_last::weekday() const NOEXCEPT +{ + return wdl_.weekday(); +} + +CONSTCD11 +inline +weekday_last +year_month_weekday_last::weekday_last() const NOEXCEPT +{ + return wdl_; +} + +CONSTCD14 +inline +year_month_weekday_last::operator sys_days() const NOEXCEPT +{ + return sys_days{to_days()}; +} + +CONSTCD14 +inline +year_month_weekday_last::operator local_days() const NOEXCEPT +{ + return local_days{to_days()}; +} + +CONSTCD11 +inline +bool +year_month_weekday_last::ok() const NOEXCEPT +{ + return y_.ok() && m_.ok() && wdl_.ok(); +} + +CONSTCD14 +inline +days +year_month_weekday_last::to_days() const NOEXCEPT +{ + auto const d = sys_days(y_/m_/last); + return (d - (date::weekday{d} - wdl_.weekday())).time_since_epoch(); +} + +CONSTCD11 +inline +bool +operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT +{ + return x.year() == y.year() && x.month() == y.month() && + x.weekday_last() == y.weekday_last(); +} + +CONSTCD11 +inline +bool +operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT +{ + return !(x == y); +} + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const year_month_weekday_last& ymwdl) +{ + detail::low_level_fmt(os, ymwdl.year()) << '/'; + detail::low_level_fmt(os, ymwdl.month()) << '/'; + detail::low_level_fmt(os, ymwdl.weekday_last()); + if (!ymwdl.ok()) + os << " is not a valid year_month_weekday_last"; + return os; +} + +template +CONSTCD14 +inline +year_month_weekday_last +operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT +{ + return (ymwdl.year() / ymwdl.month() + dm) / ymwdl.weekday_last(); +} + +template +CONSTCD14 +inline +year_month_weekday_last +operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT +{ + return ymwdl + dm; +} + +template +CONSTCD14 +inline +year_month_weekday_last +operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT +{ + return ymwdl + (-dm); +} + +CONSTCD11 +inline +year_month_weekday_last +operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT +{ + return {ymwdl.year()+dy, ymwdl.month(), ymwdl.weekday_last()}; +} + +CONSTCD11 +inline +year_month_weekday_last +operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT +{ + return ymwdl + dy; +} + +CONSTCD11 +inline +year_month_weekday_last +operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT +{ + return ymwdl + (-dy); +} + +// year_month from operator/() + +CONSTCD11 +inline +year_month +operator/(const year& y, const month& m) NOEXCEPT +{ + return {y, m}; +} + +CONSTCD11 +inline +year_month +operator/(const year& y, int m) NOEXCEPT +{ + return y / month(static_cast(m)); +} + +// month_day from operator/() + +CONSTCD11 +inline +month_day +operator/(const month& m, const day& d) NOEXCEPT +{ + return {m, d}; +} + +CONSTCD11 +inline +month_day +operator/(const day& d, const month& m) NOEXCEPT +{ + return m / d; +} + +CONSTCD11 +inline +month_day +operator/(const month& m, int d) NOEXCEPT +{ + return m / day(static_cast(d)); +} + +CONSTCD11 +inline +month_day +operator/(int m, const day& d) NOEXCEPT +{ + return month(static_cast(m)) / d; +} + +CONSTCD11 inline month_day operator/(const day& d, int m) NOEXCEPT {return m / d;} + +// month_day_last from operator/() + +CONSTCD11 +inline +month_day_last +operator/(const month& m, last_spec) NOEXCEPT +{ + return month_day_last{m}; +} + +CONSTCD11 +inline +month_day_last +operator/(last_spec, const month& m) NOEXCEPT +{ + return m/last; +} + +CONSTCD11 +inline +month_day_last +operator/(int m, last_spec) NOEXCEPT +{ + return month(static_cast(m))/last; +} + +CONSTCD11 +inline +month_day_last +operator/(last_spec, int m) NOEXCEPT +{ + return m/last; +} + +// month_weekday from operator/() + +CONSTCD11 +inline +month_weekday +operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT +{ + return {m, wdi}; +} + +CONSTCD11 +inline +month_weekday +operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT +{ + return m / wdi; +} + +CONSTCD11 +inline +month_weekday +operator/(int m, const weekday_indexed& wdi) NOEXCEPT +{ + return month(static_cast(m)) / wdi; +} + +CONSTCD11 +inline +month_weekday +operator/(const weekday_indexed& wdi, int m) NOEXCEPT +{ + return m / wdi; +} + +// month_weekday_last from operator/() + +CONSTCD11 +inline +month_weekday_last +operator/(const month& m, const weekday_last& wdl) NOEXCEPT +{ + return {m, wdl}; +} + +CONSTCD11 +inline +month_weekday_last +operator/(const weekday_last& wdl, const month& m) NOEXCEPT +{ + return m / wdl; +} + +CONSTCD11 +inline +month_weekday_last +operator/(int m, const weekday_last& wdl) NOEXCEPT +{ + return month(static_cast(m)) / wdl; +} + +CONSTCD11 +inline +month_weekday_last +operator/(const weekday_last& wdl, int m) NOEXCEPT +{ + return m / wdl; +} + +// year_month_day from operator/() + +CONSTCD11 +inline +year_month_day +operator/(const year_month& ym, const day& d) NOEXCEPT +{ + return {ym.year(), ym.month(), d}; +} + +CONSTCD11 +inline +year_month_day +operator/(const year_month& ym, int d) NOEXCEPT +{ + return ym / day(static_cast(d)); +} + +CONSTCD11 +inline +year_month_day +operator/(const year& y, const month_day& md) NOEXCEPT +{ + return y / md.month() / md.day(); +} + +CONSTCD11 +inline +year_month_day +operator/(int y, const month_day& md) NOEXCEPT +{ + return year(y) / md; +} + +CONSTCD11 +inline +year_month_day +operator/(const month_day& md, const year& y) NOEXCEPT +{ + return y / md; +} + +CONSTCD11 +inline +year_month_day +operator/(const month_day& md, int y) NOEXCEPT +{ + return year(y) / md; +} + +// year_month_day_last from operator/() + +CONSTCD11 +inline +year_month_day_last +operator/(const year_month& ym, last_spec) NOEXCEPT +{ + return {ym.year(), month_day_last{ym.month()}}; +} + +CONSTCD11 +inline +year_month_day_last +operator/(const year& y, const month_day_last& mdl) NOEXCEPT +{ + return {y, mdl}; +} + +CONSTCD11 +inline +year_month_day_last +operator/(int y, const month_day_last& mdl) NOEXCEPT +{ + return year(y) / mdl; +} + +CONSTCD11 +inline +year_month_day_last +operator/(const month_day_last& mdl, const year& y) NOEXCEPT +{ + return y / mdl; +} + +CONSTCD11 +inline +year_month_day_last +operator/(const month_day_last& mdl, int y) NOEXCEPT +{ + return year(y) / mdl; +} + +// year_month_weekday from operator/() + +CONSTCD11 +inline +year_month_weekday +operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT +{ + return {ym.year(), ym.month(), wdi}; +} + +CONSTCD11 +inline +year_month_weekday +operator/(const year& y, const month_weekday& mwd) NOEXCEPT +{ + return {y, mwd.month(), mwd.weekday_indexed()}; +} + +CONSTCD11 +inline +year_month_weekday +operator/(int y, const month_weekday& mwd) NOEXCEPT +{ + return year(y) / mwd; +} + +CONSTCD11 +inline +year_month_weekday +operator/(const month_weekday& mwd, const year& y) NOEXCEPT +{ + return y / mwd; +} + +CONSTCD11 +inline +year_month_weekday +operator/(const month_weekday& mwd, int y) NOEXCEPT +{ + return year(y) / mwd; +} + +// year_month_weekday_last from operator/() + +CONSTCD11 +inline +year_month_weekday_last +operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT +{ + return {ym.year(), ym.month(), wdl}; +} + +CONSTCD11 +inline +year_month_weekday_last +operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT +{ + return {y, mwdl.month(), mwdl.weekday_last()}; +} + +CONSTCD11 +inline +year_month_weekday_last +operator/(int y, const month_weekday_last& mwdl) NOEXCEPT +{ + return year(y) / mwdl; +} + +CONSTCD11 +inline +year_month_weekday_last +operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT +{ + return y / mwdl; +} + +CONSTCD11 +inline +year_month_weekday_last +operator/(const month_weekday_last& mwdl, int y) NOEXCEPT +{ + return year(y) / mwdl; +} + +template +struct fields; + +template +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const fields& fds, const std::string* abbrev = nullptr, + const std::chrono::seconds* offset_sec = nullptr); + +template +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, + fields& fds, std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr); + +// hh_mm_ss + +namespace detail +{ + +struct undocumented {explicit undocumented() = default;}; + +// width::value is the number of fractional decimal digits in 1/n +// width<0>::value and width<1>::value are defined to be 0 +// If 1/n takes more than 18 fractional decimal digits, +// the result is truncated to 19. +// Example: width<2>::value == 1 +// Example: width<3>::value == 19 +// Example: width<4>::value == 2 +// Example: width<10>::value == 1 +// Example: width<1000>::value == 3 +template +struct width +{ + static_assert(d > 0, "width called with zero denominator"); + static CONSTDATA unsigned value = 1 + width::value; +}; + +template +struct width +{ + static CONSTDATA unsigned value = 0; +}; + +template +struct static_pow10 +{ +private: + static CONSTDATA std::uint64_t h = static_pow10::value; +public: + static CONSTDATA std::uint64_t value = h * h * (exp % 2 ? 10 : 1); +}; + +template <> +struct static_pow10<0> +{ + static CONSTDATA std::uint64_t value = 1; +}; + +template +class decimal_format_seconds +{ + using CT = typename std::common_type::type; + using rep = typename CT::rep; + static unsigned CONSTDATA trial_width = + detail::width::value; +public: + static unsigned CONSTDATA width = trial_width < 19 ? trial_width : 6u; + using precision = std::chrono::duration::value>>; + +private: + std::chrono::seconds s_; + precision sub_s_; + +public: + CONSTCD11 decimal_format_seconds() + : s_() + , sub_s_() + {} + + CONSTCD11 explicit decimal_format_seconds(const Duration& d) NOEXCEPT + : s_(std::chrono::duration_cast(d)) + , sub_s_(std::chrono::duration_cast(d - s_)) + {} + + CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT {return s_;} + CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_;} + CONSTCD11 precision subseconds() const NOEXCEPT {return sub_s_;} + + CONSTCD14 precision to_duration() const NOEXCEPT + { + return s_ + sub_s_; + } + + CONSTCD11 bool in_conventional_range() const NOEXCEPT + { + return sub_s_ < std::chrono::seconds{1} && s_ < std::chrono::minutes{1}; + } + + template + friend + std::basic_ostream& + operator<<(std::basic_ostream& os, const decimal_format_seconds& x) + { + return x.print(os, std::chrono::treat_as_floating_point{}); + } + + template + std::basic_ostream& + print(std::basic_ostream& os, std::true_type) const + { + date::detail::save_ostream _(os); + std::chrono::duration d = s_ + sub_s_; + if (d < std::chrono::seconds{10}) + os << '0'; + os.precision(width+6); + os << std::fixed << d.count(); + return os; + } + + template + std::basic_ostream& + print(std::basic_ostream& os, std::false_type) const + { + date::detail::save_ostream _(os); + os.fill('0'); + os.flags(std::ios::dec | std::ios::right); + os.width(2); + os << s_.count(); + if (width > 0) + { +#if !ONLY_C_LOCALE + os << std::use_facet>(os.getloc()).decimal_point(); +#else + os << '.'; +#endif + date::detail::save_ostream _s(os); + os.imbue(std::locale::classic()); + os.width(width); + os << sub_s_.count(); + } + return os; + } +}; + +template +inline +CONSTCD11 +typename std::enable_if + < + std::numeric_limits::is_signed, + std::chrono::duration + >::type +abs(std::chrono::duration d) +{ + return d >= d.zero() ? +d : -d; +} + +template +inline +CONSTCD11 +typename std::enable_if + < + !std::numeric_limits::is_signed, + std::chrono::duration + >::type +abs(std::chrono::duration d) +{ + return d; +} + +} // namespace detail + +template +class hh_mm_ss +{ + using dfs = detail::decimal_format_seconds::type>; + + std::chrono::hours h_; + std::chrono::minutes m_; + dfs s_; + bool neg_; + +public: + static unsigned CONSTDATA fractional_width = dfs::width; + using precision = typename dfs::precision; + + CONSTCD11 hh_mm_ss() NOEXCEPT + : hh_mm_ss(Duration::zero()) + {} + + CONSTCD11 explicit hh_mm_ss(Duration d) NOEXCEPT + : h_(std::chrono::duration_cast(detail::abs(d))) + , m_(std::chrono::duration_cast(detail::abs(d)) - h_) + , s_(detail::abs(d) - h_ - m_) + , neg_(d < Duration::zero()) + {} + + CONSTCD11 std::chrono::hours hours() const NOEXCEPT {return h_;} + CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT {return m_;} + CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_.seconds();} + CONSTCD14 std::chrono::seconds& + seconds(detail::undocumented) NOEXCEPT {return s_.seconds();} + CONSTCD11 precision subseconds() const NOEXCEPT {return s_.subseconds();} + CONSTCD11 bool is_negative() const NOEXCEPT {return neg_;} + + CONSTCD11 explicit operator precision() const NOEXCEPT {return to_duration();} + CONSTCD11 precision to_duration() const NOEXCEPT + {return (s_.to_duration() + m_ + h_) * (1-2*neg_);} + + CONSTCD11 bool in_conventional_range() const NOEXCEPT + { + return !neg_ && h_ < days{1} && m_ < std::chrono::hours{1} && + s_.in_conventional_range(); + } + +private: + + template + friend + std::basic_ostream& + operator<<(std::basic_ostream& os, hh_mm_ss const& tod) + { + if (tod.is_negative()) + os << '-'; + if (tod.h_ < std::chrono::hours{10}) + os << '0'; + os << tod.h_.count() << ':'; + if (tod.m_ < std::chrono::minutes{10}) + os << '0'; + os << tod.m_.count() << ':' << tod.s_; + return os; + } + + template + friend + std::basic_ostream& + date::to_stream(std::basic_ostream& os, const CharT* fmt, + const fields& fds, const std::string* abbrev, + const std::chrono::seconds* offset_sec); + + template + friend + std::basic_istream& + date::from_stream(std::basic_istream& is, const CharT* fmt, + fields& fds, + std::basic_string* abbrev, std::chrono::minutes* offset); +}; + +inline +CONSTCD14 +bool +is_am(std::chrono::hours const& h) NOEXCEPT +{ + using std::chrono::hours; + return hours{0} <= h && h < hours{12}; +} + +inline +CONSTCD14 +bool +is_pm(std::chrono::hours const& h) NOEXCEPT +{ + using std::chrono::hours; + return hours{12} <= h && h < hours{24}; +} + +inline +CONSTCD14 +std::chrono::hours +make12(std::chrono::hours h) NOEXCEPT +{ + using std::chrono::hours; + if (h < hours{12}) + { + if (h == hours{0}) + h = hours{12}; + } + else + { + if (h != hours{12}) + h = h - hours{12}; + } + return h; +} + +inline +CONSTCD14 +std::chrono::hours +make24(std::chrono::hours h, bool is_pm) NOEXCEPT +{ + using std::chrono::hours; + if (is_pm) + { + if (h != hours{12}) + h = h + hours{12}; + } + else if (h == hours{12}) + h = hours{0}; + return h; +} + +template +using time_of_day = hh_mm_ss; + +template +CONSTCD11 +inline +hh_mm_ss> +make_time(const std::chrono::duration& d) +{ + return hh_mm_ss>(d); +} + +template +inline +typename std::enable_if +< + !std::is_convertible::value, + std::basic_ostream& +>::type +operator<<(std::basic_ostream& os, const sys_time& tp) +{ + auto const dp = date::floor(tp); + return os << year_month_day(dp) << ' ' << make_time(tp-dp); +} + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const sys_days& dp) +{ + return os << year_month_day(dp); +} + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const local_time& ut) +{ + return (os << sys_time{ut.time_since_epoch()}); +} + +namespace detail +{ + +template +class string_literal; + +template +inline +CONSTCD14 +string_literal::type, + N1 + N2 - 1> +operator+(const string_literal& x, const string_literal& y) NOEXCEPT; + +template +class string_literal +{ + CharT p_[N]; + + CONSTCD11 string_literal() NOEXCEPT + : p_{} + {} + +public: + using const_iterator = const CharT*; + + string_literal(string_literal const&) = default; + string_literal& operator=(string_literal const&) = delete; + + template ::type> + CONSTCD11 string_literal(CharT c) NOEXCEPT + : p_{c} + { + } + + template ::type> + CONSTCD11 string_literal(CharT c1, CharT c2) NOEXCEPT + : p_{c1, c2} + { + } + + template ::type> + CONSTCD11 string_literal(CharT c1, CharT c2, CharT c3) NOEXCEPT + : p_{c1, c2, c3} + { + } + + CONSTCD14 string_literal(const CharT(&a)[N]) NOEXCEPT + : p_{} + { + for (std::size_t i = 0; i < N; ++i) + p_[i] = a[i]; + } + + template ::type> + CONSTCD14 string_literal(const char(&a)[N]) NOEXCEPT + : p_{} + { + for (std::size_t i = 0; i < N; ++i) + p_[i] = a[i]; + } + + template ::value>::type> + CONSTCD14 string_literal(string_literal const& a) NOEXCEPT + : p_{} + { + for (std::size_t i = 0; i < N; ++i) + p_[i] = a[i]; + } + + CONSTCD11 const CharT* data() const NOEXCEPT {return p_;} + CONSTCD11 std::size_t size() const NOEXCEPT {return N-1;} + + CONSTCD11 const_iterator begin() const NOEXCEPT {return p_;} + CONSTCD11 const_iterator end() const NOEXCEPT {return p_ + N-1;} + + CONSTCD11 CharT const& operator[](std::size_t n) const NOEXCEPT + { + return p_[n]; + } + + template + friend + std::basic_ostream& + operator<<(std::basic_ostream& os, const string_literal& s) + { + return os << s.p_; + } + + template + friend + CONSTCD14 + string_literal::type, + N1 + N2 - 1> + operator+(const string_literal& x, const string_literal& y) NOEXCEPT; +}; + +template +CONSTCD11 +inline +string_literal +operator+(const string_literal& x, const string_literal& y) NOEXCEPT +{ + return string_literal(x[0], y[0]); +} + +template +CONSTCD11 +inline +string_literal +operator+(const string_literal& x, const string_literal& y) NOEXCEPT +{ + return string_literal(x[0], x[1], y[0]); +} + +template +CONSTCD14 +inline +string_literal::type, + N1 + N2 - 1> +operator+(const string_literal& x, const string_literal& y) NOEXCEPT +{ + using CT = typename std::conditional::type; + + string_literal r; + std::size_t i = 0; + for (; i < N1-1; ++i) + r.p_[i] = CT(x.p_[i]); + for (std::size_t j = 0; j < N2; ++j, ++i) + r.p_[i] = CT(y.p_[j]); + + return r; +} + + +template +inline +std::basic_string +operator+(std::basic_string x, const string_literal& y) +{ + x.append(y.data(), y.size()); + return x; +} + +#if __cplusplus >= 201402 && (!defined(__EDG_VERSION__) || __EDG_VERSION__ > 411) \ + && (!defined(__SUNPRO_CC) || __SUNPRO_CC > 0x5150) + +template ::value || + std::is_same::value || + std::is_same::value || + std::is_same::value>> +CONSTCD14 +inline +string_literal +msl(CharT c) NOEXCEPT +{ + return string_literal{c}; +} + +CONSTCD14 +inline +std::size_t +to_string_len(std::intmax_t i) +{ + std::size_t r = 0; + do + { + i /= 10; + ++r; + } while (i > 0); + return r; +} + +template +CONSTCD14 +inline +std::enable_if_t +< + N < 10, + string_literal +> +msl() NOEXCEPT +{ + return msl(char(N % 10 + '0')); +} + +template +CONSTCD14 +inline +std::enable_if_t +< + 10 <= N, + string_literal +> +msl() NOEXCEPT +{ + return msl() + msl(char(N % 10 + '0')); +} + +template +CONSTCD14 +inline +std::enable_if_t +< + std::ratio::type::den != 1, + string_literal::type::num) + + to_string_len(std::ratio::type::den) + 4> +> +msl(std::ratio) NOEXCEPT +{ + using R = typename std::ratio::type; + return msl(CharT{'['}) + msl() + msl(CharT{'/'}) + + msl() + msl(CharT{']'}); +} + +template +CONSTCD14 +inline +std::enable_if_t +< + std::ratio::type::den == 1, + string_literal::type::num) + 3> +> +msl(std::ratio) NOEXCEPT +{ + using R = typename std::ratio::type; + return msl(CharT{'['}) + msl() + msl(CharT{']'}); +} + + +#else // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411) + +inline +std::string +to_string(std::uint64_t x) +{ + return std::to_string(x); +} + +template +inline +std::basic_string +to_string(std::uint64_t x) +{ + auto y = std::to_string(x); + return std::basic_string(y.begin(), y.end()); +} + +template +inline +typename std::enable_if +< + std::ratio::type::den != 1, + std::basic_string +>::type +msl(std::ratio) +{ + using R = typename std::ratio::type; + return std::basic_string(1, '[') + to_string(R::num) + CharT{'/'} + + to_string(R::den) + CharT{']'}; +} + +template +inline +typename std::enable_if +< + std::ratio::type::den == 1, + std::basic_string +>::type +msl(std::ratio) +{ + using R = typename std::ratio::type; + return std::basic_string(1, '[') + to_string(R::num) + CharT{']'}; +} + +#endif // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411) + +template +CONSTCD11 +inline +string_literal +msl(std::atto) NOEXCEPT +{ + return string_literal{'a'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::femto) NOEXCEPT +{ + return string_literal{'f'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::pico) NOEXCEPT +{ + return string_literal{'p'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::nano) NOEXCEPT +{ + return string_literal{'n'}; +} + +template +CONSTCD11 +inline +typename std::enable_if +< + std::is_same::value, + string_literal +>::type +msl(std::micro) NOEXCEPT +{ + return string_literal{'\xC2', '\xB5'}; +} + +template +CONSTCD11 +inline +typename std::enable_if +< + !std::is_same::value, + string_literal +>::type +msl(std::micro) NOEXCEPT +{ + return string_literal{CharT{static_cast('\xB5')}}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::milli) NOEXCEPT +{ + return string_literal{'m'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::centi) NOEXCEPT +{ + return string_literal{'c'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::deca) NOEXCEPT +{ + return string_literal{'d', 'a'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::deci) NOEXCEPT +{ + return string_literal{'d'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::hecto) NOEXCEPT +{ + return string_literal{'h'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::kilo) NOEXCEPT +{ + return string_literal{'k'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::mega) NOEXCEPT +{ + return string_literal{'M'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::giga) NOEXCEPT +{ + return string_literal{'G'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::tera) NOEXCEPT +{ + return string_literal{'T'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::peta) NOEXCEPT +{ + return string_literal{'P'}; +} + +template +CONSTCD11 +inline +string_literal +msl(std::exa) NOEXCEPT +{ + return string_literal{'E'}; +} + +template +CONSTCD11 +inline +auto +get_units(Period p) + -> decltype(msl(p) + string_literal{'s'}) +{ + return msl(p) + string_literal{'s'}; +} + +template +CONSTCD11 +inline +string_literal +get_units(std::ratio<1>) +{ + return string_literal{'s'}; +} + +template +CONSTCD11 +inline +string_literal +get_units(std::ratio<3600>) +{ + return string_literal{'h'}; +} + +template +CONSTCD11 +inline +string_literal +get_units(std::ratio<60>) +{ + return string_literal{'m', 'i', 'n'}; +} + +template +CONSTCD11 +inline +string_literal +get_units(std::ratio<86400>) +{ + return string_literal{'d'}; +} + +template > +struct make_string; + +template <> +struct make_string +{ + template + static + std::string + from(Rep n) + { + return std::to_string(n); + } +}; + +template +struct make_string +{ + template + static + std::basic_string + from(Rep n) + { + auto s = std::to_string(n); + return std::basic_string(s.begin(), s.end()); + } +}; + +template <> +struct make_string +{ + template + static + std::wstring + from(Rep n) + { + return std::to_wstring(n); + } +}; + +template +struct make_string +{ + template + static + std::basic_string + from(Rep n) + { + auto s = std::to_wstring(n); + return std::basic_string(s.begin(), s.end()); + } +}; + +} // namespace detail + +// to_stream + +CONSTDATA year nanyear{-32768}; + +template +struct fields +{ + year_month_day ymd{nanyear/0/0}; + weekday wd{8u}; + hh_mm_ss tod{}; + bool has_tod = false; + +#if !defined(__clang__) && defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ <= 409) + fields() : ymd{nanyear/0/0}, wd{8u}, tod{}, has_tod{false} {} +#else + fields() = default; +#endif + + fields(year_month_day ymd_) : ymd(ymd_) {} + fields(weekday wd_) : wd(wd_) {} + fields(hh_mm_ss tod_) : tod(tod_), has_tod(true) {} + + fields(year_month_day ymd_, weekday wd_) : ymd(ymd_), wd(wd_) {} + fields(year_month_day ymd_, hh_mm_ss tod_) : ymd(ymd_), tod(tod_), + has_tod(true) {} + + fields(weekday wd_, hh_mm_ss tod_) : wd(wd_), tod(tod_), has_tod(true) {} + + fields(year_month_day ymd_, weekday wd_, hh_mm_ss tod_) + : ymd(ymd_) + , wd(wd_) + , tod(tod_) + , has_tod(true) + {} +}; + +namespace detail +{ + +template +unsigned +extract_weekday(std::basic_ostream& os, const fields& fds) +{ + if (!fds.ymd.ok() && !fds.wd.ok()) + { + // fds does not contain a valid weekday + os.setstate(std::ios::failbit); + return 8; + } + weekday wd; + if (fds.ymd.ok()) + { + wd = weekday{sys_days(fds.ymd)}; + if (fds.wd.ok() && wd != fds.wd) + { + // fds.ymd and fds.wd are inconsistent + os.setstate(std::ios::failbit); + return 8; + } + } + else + wd = fds.wd; + return static_cast((wd - Sunday).count()); +} + +template +unsigned +extract_month(std::basic_ostream& os, const fields& fds) +{ + if (!fds.ymd.month().ok()) + { + // fds does not contain a valid month + os.setstate(std::ios::failbit); + return 0; + } + return static_cast(fds.ymd.month()); +} + +} // namespace detail + +#if ONLY_C_LOCALE + +namespace detail +{ + +inline +std::pair +weekday_names() +{ + static const std::string nm[] = + { + "Sunday", + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sun", + "Mon", + "Tue", + "Wed", + "Thu", + "Fri", + "Sat" + }; + return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0])); +} + +inline +std::pair +month_names() +{ + static const std::string nm[] = + { + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec" + }; + return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0])); +} + +inline +std::pair +ampm_names() +{ + static const std::string nm[] = + { + "AM", + "PM" + }; + return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0])); +} + +template +FwdIter +scan_keyword(std::basic_istream& is, FwdIter kb, FwdIter ke) +{ + size_t nkw = static_cast(std::distance(kb, ke)); + const unsigned char doesnt_match = '\0'; + const unsigned char might_match = '\1'; + const unsigned char does_match = '\2'; + unsigned char statbuf[100]; + unsigned char* status = statbuf; + std::unique_ptr stat_hold(0, free); + if (nkw > sizeof(statbuf)) + { + status = (unsigned char*)std::malloc(nkw); + if (status == nullptr) + throw std::bad_alloc(); + stat_hold.reset(status); + } + size_t n_might_match = nkw; // At this point, any keyword might match + size_t n_does_match = 0; // but none of them definitely do + // Initialize all statuses to might_match, except for "" keywords are does_match + unsigned char* st = status; + for (auto ky = kb; ky != ke; ++ky, ++st) + { + if (!ky->empty()) + *st = might_match; + else + { + *st = does_match; + --n_might_match; + ++n_does_match; + } + } + // While there might be a match, test keywords against the next CharT + for (size_t indx = 0; is && n_might_match > 0; ++indx) + { + // Peek at the next CharT but don't consume it + auto ic = is.peek(); + if (ic == EOF) + { + is.setstate(std::ios::eofbit); + break; + } + auto c = static_cast(toupper(static_cast(ic))); + bool consume = false; + // For each keyword which might match, see if the indx character is c + // If a match if found, consume c + // If a match is found, and that is the last character in the keyword, + // then that keyword matches. + // If the keyword doesn't match this character, then change the keyword + // to doesn't match + st = status; + for (auto ky = kb; ky != ke; ++ky, ++st) + { + if (*st == might_match) + { + if (c == static_cast(toupper(static_cast((*ky)[indx])))) + { + consume = true; + if (ky->size() == indx+1) + { + *st = does_match; + --n_might_match; + ++n_does_match; + } + } + else + { + *st = doesnt_match; + --n_might_match; + } + } + } + // consume if we matched a character + if (consume) + { + (void)is.get(); + // If we consumed a character and there might be a matched keyword that + // was marked matched on a previous iteration, then such keywords + // are now marked as not matching. + if (n_might_match + n_does_match > 1) + { + st = status; + for (auto ky = kb; ky != ke; ++ky, ++st) + { + if (*st == does_match && ky->size() != indx+1) + { + *st = doesnt_match; + --n_does_match; + } + } + } + } + } + // We've exited the loop because we hit eof and/or we have no more "might matches". + // Return the first matching result + for (st = status; kb != ke; ++kb, ++st) + if (*st == does_match) + break; + if (kb == ke) + is.setstate(std::ios::failbit); + return kb; +} + +} // namespace detail + +#endif // ONLY_C_LOCALE + +template +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const fields& fds, const std::string* abbrev, + const std::chrono::seconds* offset_sec) +{ +#if ONLY_C_LOCALE + using detail::weekday_names; + using detail::month_names; + using detail::ampm_names; +#endif + using detail::save_ostream; + using detail::get_units; + using detail::extract_weekday; + using detail::extract_month; + using std::ios; + using std::chrono::duration_cast; + using std::chrono::seconds; + using std::chrono::minutes; + using std::chrono::hours; + date::detail::save_ostream ss(os); + os.fill(' '); + os.flags(std::ios::skipws | std::ios::dec); + os.width(0); + tm tm{}; + bool insert_negative = fds.has_tod && fds.tod.to_duration() < Duration::zero(); +#if !ONLY_C_LOCALE + auto& facet = std::use_facet>(os.getloc()); +#endif + const CharT* command = nullptr; + CharT modified = CharT{}; + for (; *fmt; ++fmt) + { + switch (*fmt) + { + case 'a': + case 'A': + if (command) + { + if (modified == CharT{}) + { + tm.tm_wday = static_cast(extract_weekday(os, fds)); + if (os.fail()) + return os; +#if !ONLY_C_LOCALE + const CharT f[] = {'%', *fmt}; + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); +#else // ONLY_C_LOCALE + os << weekday_names().first[tm.tm_wday+7*(*fmt == 'a')]; +#endif // ONLY_C_LOCALE + } + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'b': + case 'B': + case 'h': + if (command) + { + if (modified == CharT{}) + { + tm.tm_mon = static_cast(extract_month(os, fds)) - 1; +#if !ONLY_C_LOCALE + const CharT f[] = {'%', *fmt}; + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); +#else // ONLY_C_LOCALE + os << month_names().first[tm.tm_mon+12*(*fmt != 'B')]; +#endif // ONLY_C_LOCALE + } + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'c': + case 'x': + if (command) + { + if (modified == CharT{'O'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.ymd.ok()) + os.setstate(std::ios::failbit); + if (*fmt == 'c' && !fds.has_tod) + os.setstate(std::ios::failbit); +#if !ONLY_C_LOCALE + tm = std::tm{}; + auto const& ymd = fds.ymd; + auto ld = local_days(ymd); + if (*fmt == 'c') + { + tm.tm_sec = static_cast(fds.tod.seconds().count()); + tm.tm_min = static_cast(fds.tod.minutes().count()); + tm.tm_hour = static_cast(fds.tod.hours().count()); + } + tm.tm_mday = static_cast(static_cast(ymd.day())); + tm.tm_mon = static_cast(extract_month(os, fds) - 1); + tm.tm_year = static_cast(ymd.year()) - 1900; + tm.tm_wday = static_cast(extract_weekday(os, fds)); + if (os.fail()) + return os; + tm.tm_yday = static_cast((ld - local_days(ymd.year()/1/1)).count()); + CharT f[3] = {'%'}; + auto fe = std::begin(f) + 1; + if (modified == CharT{'E'}) + *fe++ = modified; + *fe++ = *fmt; + facet.put(os, os, os.fill(), &tm, std::begin(f), fe); +#else // ONLY_C_LOCALE + if (*fmt == 'c') + { + auto wd = static_cast(extract_weekday(os, fds)); + os << weekday_names().first[static_cast(wd)+7] + << ' '; + os << month_names().first[extract_month(os, fds)-1+12] << ' '; + auto d = static_cast(static_cast(fds.ymd.day())); + if (d < 10) + os << ' '; + os << d << ' ' + << make_time(duration_cast(fds.tod.to_duration())) + << ' ' << fds.ymd.year(); + + } + else // *fmt == 'x' + { + auto const& ymd = fds.ymd; + save_ostream _(os); + os.fill('0'); + os.flags(std::ios::dec | std::ios::right); + os.width(2); + os << static_cast(ymd.month()) << CharT{'/'}; + os.width(2); + os << static_cast(ymd.day()) << CharT{'/'}; + os.width(2); + os << static_cast(ymd.year()) % 100; + } +#endif // ONLY_C_LOCALE + } + command = nullptr; + modified = CharT{}; + } + else + os << *fmt; + break; + case 'C': + if (command) + { + if (modified == CharT{'O'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.ymd.year().ok()) + os.setstate(std::ios::failbit); + auto y = static_cast(fds.ymd.year()); +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + save_ostream _(os); + os.fill('0'); + os.flags(std::ios::dec | std::ios::right); + if (y >= 0) + { + os.width(2); + os << y/100; + } + else + { + os << CharT{'-'}; + os.width(2); + os << -(y-99)/100; + } + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'E'}) + { + tm.tm_year = y - 1900; + CharT f[3] = {'%', 'E', 'C'}; + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + command = nullptr; + modified = CharT{}; + } + else + os << *fmt; + break; + case 'd': + case 'e': + if (command) + { + if (modified == CharT{'E'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.ymd.day().ok()) + os.setstate(std::ios::failbit); + auto d = static_cast(static_cast(fds.ymd.day())); +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + save_ostream _(os); + if (*fmt == CharT{'d'}) + os.fill('0'); + else + os.fill(' '); + os.flags(std::ios::dec | std::ios::right); + os.width(2); + os << d; + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + tm.tm_mday = d; + CharT f[3] = {'%', 'O', *fmt}; + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + command = nullptr; + modified = CharT{}; + } + else + os << *fmt; + break; + case 'D': + if (command) + { + if (modified == CharT{}) + { + if (!fds.ymd.ok()) + os.setstate(std::ios::failbit); + auto const& ymd = fds.ymd; + save_ostream _(os); + os.fill('0'); + os.flags(std::ios::dec | std::ios::right); + os.width(2); + os << static_cast(ymd.month()) << CharT{'/'}; + os.width(2); + os << static_cast(ymd.day()) << CharT{'/'}; + os.width(2); + os << static_cast(ymd.year()) % 100; + } + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'F': + if (command) + { + if (modified == CharT{}) + { + if (!fds.ymd.ok()) + os.setstate(std::ios::failbit); + auto const& ymd = fds.ymd; + save_ostream _(os); + os.imbue(std::locale::classic()); + os.fill('0'); + os.flags(std::ios::dec | std::ios::right); + os.width(4); + os << static_cast(ymd.year()) << CharT{'-'}; + os.width(2); + os << static_cast(ymd.month()) << CharT{'-'}; + os.width(2); + os << static_cast(ymd.day()); + } + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'g': + case 'G': + if (command) + { + if (modified == CharT{}) + { + if (!fds.ymd.ok()) + os.setstate(std::ios::failbit); + auto ld = local_days(fds.ymd); + auto y = year_month_day{ld + days{3}}.year(); + auto start = local_days((y-years{1})/December/Thursday[last]) + + (Monday-Thursday); + if (ld < start) + --y; + if (*fmt == CharT{'G'}) + os << y; + else + { + save_ostream _(os); + os.fill('0'); + os.flags(std::ios::dec | std::ios::right); + os.width(2); + os << std::abs(static_cast(y)) % 100; + } + } + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'H': + case 'I': + if (command) + { + if (modified == CharT{'E'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.has_tod) + os.setstate(std::ios::failbit); + if (insert_negative) + { + os << '-'; + insert_negative = false; + } + auto hms = fds.tod; +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + auto h = *fmt == CharT{'I'} ? date::make12(hms.hours()) : hms.hours(); + if (h < hours{10}) + os << CharT{'0'}; + os << h.count(); + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_hour = static_cast(hms.hours().count()); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'j': + if (command) + { + if (modified == CharT{}) + { + if (fds.ymd.ok() || fds.has_tod) + { + days doy; + if (fds.ymd.ok()) + { + auto ld = local_days(fds.ymd); + auto y = fds.ymd.year(); + doy = ld - local_days(y/January/1) + days{1}; + } + else + { + doy = duration_cast(fds.tod.to_duration()); + } + save_ostream _(os); + os.fill('0'); + os.flags(std::ios::dec | std::ios::right); + os.width(3); + os << doy.count(); + } + else + { + os.setstate(std::ios::failbit); + } + } + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'm': + if (command) + { + if (modified == CharT{'E'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.ymd.month().ok()) + os.setstate(std::ios::failbit); + auto m = static_cast(fds.ymd.month()); +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + if (m < 10) + os << CharT{'0'}; + os << m; + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_mon = static_cast(m-1); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'M': + if (command) + { + if (modified == CharT{'E'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.has_tod) + os.setstate(std::ios::failbit); + if (insert_negative) + { + os << '-'; + insert_negative = false; + } +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + if (fds.tod.minutes() < minutes{10}) + os << CharT{'0'}; + os << fds.tod.minutes().count(); + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_min = static_cast(fds.tod.minutes().count()); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'n': + if (command) + { + if (modified == CharT{}) + os << CharT{'\n'}; + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'p': + if (command) + { + if (modified == CharT{}) + { + if (!fds.has_tod) + os.setstate(std::ios::failbit); +#if !ONLY_C_LOCALE + const CharT f[] = {'%', *fmt}; + tm.tm_hour = static_cast(fds.tod.hours().count()); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); +#else + if (date::is_am(fds.tod.hours())) + os << ampm_names().first[0]; + else + os << ampm_names().first[1]; +#endif + } + else + { + os << CharT{'%'} << modified << *fmt; + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'Q': + case 'q': + if (command) + { + if (modified == CharT{}) + { + if (!fds.has_tod) + os.setstate(std::ios::failbit); + auto d = fds.tod.to_duration(); + if (*fmt == 'q') + os << get_units(typename decltype(d)::period::type{}); + else + os << d.count(); + } + else + { + os << CharT{'%'} << modified << *fmt; + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'r': + if (command) + { + if (modified == CharT{}) + { + if (!fds.has_tod) + os.setstate(std::ios::failbit); +#if !ONLY_C_LOCALE + const CharT f[] = {'%', *fmt}; + tm.tm_hour = static_cast(fds.tod.hours().count()); + tm.tm_min = static_cast(fds.tod.minutes().count()); + tm.tm_sec = static_cast(fds.tod.seconds().count()); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); +#else + hh_mm_ss tod(duration_cast(fds.tod.to_duration())); + save_ostream _(os); + os.fill('0'); + os.width(2); + os << date::make12(tod.hours()).count() << CharT{':'}; + os.width(2); + os << tod.minutes().count() << CharT{':'}; + os.width(2); + os << tod.seconds().count() << CharT{' '}; + if (date::is_am(tod.hours())) + os << ampm_names().first[0]; + else + os << ampm_names().first[1]; +#endif + } + else + { + os << CharT{'%'} << modified << *fmt; + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'R': + if (command) + { + if (modified == CharT{}) + { + if (!fds.has_tod) + os.setstate(std::ios::failbit); + if (fds.tod.hours() < hours{10}) + os << CharT{'0'}; + os << fds.tod.hours().count() << CharT{':'}; + if (fds.tod.minutes() < minutes{10}) + os << CharT{'0'}; + os << fds.tod.minutes().count(); + } + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'S': + if (command) + { + if (modified == CharT{'E'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.has_tod) + os.setstate(std::ios::failbit); + if (insert_negative) + { + os << '-'; + insert_negative = false; + } +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + os << fds.tod.s_; + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_sec = static_cast(fds.tod.s_.seconds().count()); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 't': + if (command) + { + if (modified == CharT{}) + os << CharT{'\t'}; + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'T': + if (command) + { + if (modified == CharT{}) + { + if (!fds.has_tod) + os.setstate(std::ios::failbit); + os << fds.tod; + } + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'u': + if (command) + { + if (modified == CharT{'E'}) + os << CharT{'%'} << modified << *fmt; + else + { + auto wd = extract_weekday(os, fds); +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + os << (wd != 0 ? wd : 7u); + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_wday = static_cast(wd); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'U': + if (command) + { + if (modified == CharT{'E'}) + os << CharT{'%'} << modified << *fmt; + else + { + auto const& ymd = fds.ymd; + if (!ymd.ok()) + os.setstate(std::ios::failbit); + auto ld = local_days(ymd); +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + auto st = local_days(Sunday[1]/January/ymd.year()); + if (ld < st) + os << CharT{'0'} << CharT{'0'}; + else + { + auto wn = duration_cast(ld - st).count() + 1; + if (wn < 10) + os << CharT{'0'}; + os << wn; + } + } + #if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_year = static_cast(ymd.year()) - 1900; + tm.tm_wday = static_cast(extract_weekday(os, fds)); + if (os.fail()) + return os; + tm.tm_yday = static_cast((ld - local_days(ymd.year()/1/1)).count()); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'V': + if (command) + { + if (modified == CharT{'E'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.ymd.ok()) + os.setstate(std::ios::failbit); + auto ld = local_days(fds.ymd); +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + auto y = year_month_day{ld + days{3}}.year(); + auto st = local_days((y-years{1})/12/Thursday[last]) + + (Monday-Thursday); + if (ld < st) + { + --y; + st = local_days((y - years{1})/12/Thursday[last]) + + (Monday-Thursday); + } + auto wn = duration_cast(ld - st).count() + 1; + if (wn < 10) + os << CharT{'0'}; + os << wn; + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + const CharT f[] = {'%', modified, *fmt}; + auto const& ymd = fds.ymd; + tm.tm_year = static_cast(ymd.year()) - 1900; + tm.tm_wday = static_cast(extract_weekday(os, fds)); + if (os.fail()) + return os; + tm.tm_yday = static_cast((ld - local_days(ymd.year()/1/1)).count()); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'w': + if (command) + { + auto wd = extract_weekday(os, fds); + if (os.fail()) + return os; +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#else + if (modified != CharT{'E'}) +#endif + { + os << wd; + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_wday = static_cast(wd); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + else + { + os << CharT{'%'} << modified << *fmt; + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'W': + if (command) + { + if (modified == CharT{'E'}) + os << CharT{'%'} << modified << *fmt; + else + { + auto const& ymd = fds.ymd; + if (!ymd.ok()) + os.setstate(std::ios::failbit); + auto ld = local_days(ymd); +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + auto st = local_days(Monday[1]/January/ymd.year()); + if (ld < st) + os << CharT{'0'} << CharT{'0'}; + else + { + auto wn = duration_cast(ld - st).count() + 1; + if (wn < 10) + os << CharT{'0'}; + os << wn; + } + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_year = static_cast(ymd.year()) - 1900; + tm.tm_wday = static_cast(extract_weekday(os, fds)); + if (os.fail()) + return os; + tm.tm_yday = static_cast((ld - local_days(ymd.year()/1/1)).count()); + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'X': + if (command) + { + if (modified == CharT{'O'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.has_tod) + os.setstate(std::ios::failbit); +#if !ONLY_C_LOCALE + tm = std::tm{}; + tm.tm_sec = static_cast(fds.tod.seconds().count()); + tm.tm_min = static_cast(fds.tod.minutes().count()); + tm.tm_hour = static_cast(fds.tod.hours().count()); + CharT f[3] = {'%'}; + auto fe = std::begin(f) + 1; + if (modified == CharT{'E'}) + *fe++ = modified; + *fe++ = *fmt; + facet.put(os, os, os.fill(), &tm, std::begin(f), fe); +#else + os << fds.tod; +#endif + } + command = nullptr; + modified = CharT{}; + } + else + os << *fmt; + break; + case 'y': + if (command) + { + if (!fds.ymd.year().ok()) + os.setstate(std::ios::failbit); + auto y = static_cast(fds.ymd.year()); +#if !ONLY_C_LOCALE + if (modified == CharT{}) + { +#endif + y = std::abs(y) % 100; + if (y < 10) + os << CharT{'0'}; + os << y; +#if !ONLY_C_LOCALE + } + else + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_year = y - 1900; + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'Y': + if (command) + { + if (modified == CharT{'O'}) + os << CharT{'%'} << modified << *fmt; + else + { + if (!fds.ymd.year().ok()) + os.setstate(std::ios::failbit); + auto y = fds.ymd.year(); +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + save_ostream _(os); + os.imbue(std::locale::classic()); + os << y; + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'E'}) + { + const CharT f[] = {'%', modified, *fmt}; + tm.tm_year = static_cast(y) - 1900; + facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f)); + } +#endif + } + modified = CharT{}; + command = nullptr; + } + else + os << *fmt; + break; + case 'z': + if (command) + { + if (offset_sec == nullptr) + { + // Can not format %z with unknown offset + os.setstate(ios::failbit); + return os; + } + auto m = duration_cast(*offset_sec); + auto neg = m < minutes{0}; + m = date::abs(m); + auto h = duration_cast(m); + m -= h; + if (neg) + os << CharT{'-'}; + else + os << CharT{'+'}; + if (h < hours{10}) + os << CharT{'0'}; + os << h.count(); + if (modified != CharT{}) + os << CharT{':'}; + if (m < minutes{10}) + os << CharT{'0'}; + os << m.count(); + command = nullptr; + modified = CharT{}; + } + else + os << *fmt; + break; + case 'Z': + if (command) + { + if (modified == CharT{}) + { + if (abbrev == nullptr) + { + // Can not format %Z with unknown time_zone + os.setstate(ios::failbit); + return os; + } + for (auto c : *abbrev) + os << CharT(c); + } + else + { + os << CharT{'%'} << modified << *fmt; + modified = CharT{}; + } + command = nullptr; + } + else + os << *fmt; + break; + case 'E': + case 'O': + if (command) + { + if (modified == CharT{}) + { + modified = *fmt; + } + else + { + os << CharT{'%'} << modified << *fmt; + command = nullptr; + modified = CharT{}; + } + } + else + os << *fmt; + break; + case '%': + if (command) + { + if (modified == CharT{}) + { + os << CharT{'%'}; + command = nullptr; + } + else + { + os << CharT{'%'} << modified << CharT{'%'}; + command = nullptr; + modified = CharT{}; + } + } + else + command = fmt; + break; + default: + if (command) + { + os << CharT{'%'}; + command = nullptr; + } + if (modified != CharT{}) + { + os << modified; + modified = CharT{}; + } + os << *fmt; + break; + } + } + if (command) + os << CharT{'%'}; + if (modified != CharT{}) + os << modified; + return os; +} + +template +inline +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, const year& y) +{ + using CT = std::chrono::seconds; + fields fds{y/0/0}; + return to_stream(os, fmt, fds); +} + +template +inline +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, const month& m) +{ + using CT = std::chrono::seconds; + fields fds{m/0/nanyear}; + return to_stream(os, fmt, fds); +} + +template +inline +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, const day& d) +{ + using CT = std::chrono::seconds; + fields fds{d/0/nanyear}; + return to_stream(os, fmt, fds); +} + +template +inline +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, const weekday& wd) +{ + using CT = std::chrono::seconds; + fields fds{wd}; + return to_stream(os, fmt, fds); +} + +template +inline +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, const year_month& ym) +{ + using CT = std::chrono::seconds; + fields fds{ym/0}; + return to_stream(os, fmt, fds); +} + +template +inline +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, const month_day& md) +{ + using CT = std::chrono::seconds; + fields fds{md/nanyear}; + return to_stream(os, fmt, fds); +} + +template +inline +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const year_month_day& ymd) +{ + using CT = std::chrono::seconds; + fields fds{ymd}; + return to_stream(os, fmt, fds); +} + +template +inline +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const std::chrono::duration& d) +{ + using Duration = std::chrono::duration; + using CT = typename std::common_type::type; + fields fds{hh_mm_ss{d}}; + return to_stream(os, fmt, fds); +} + +template +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const local_time& tp, const std::string* abbrev = nullptr, + const std::chrono::seconds* offset_sec = nullptr) +{ + using CT = typename std::common_type::type; + auto ld = std::chrono::time_point_cast(tp); + fields fds; + if (ld <= tp) + fds = fields{year_month_day{ld}, hh_mm_ss{tp-local_seconds{ld}}}; + else + fds = fields{year_month_day{ld - days{1}}, + hh_mm_ss{days{1} - (local_seconds{ld} - tp)}}; + return to_stream(os, fmt, fds, abbrev, offset_sec); +} + +template +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const sys_time& tp) +{ + using std::chrono::seconds; + using CT = typename std::common_type::type; + const std::string abbrev("UTC"); + CONSTDATA seconds offset{0}; + auto sd = std::chrono::time_point_cast(tp); + fields fds; + if (sd <= tp) + fds = fields{year_month_day{sd}, hh_mm_ss{tp-sys_seconds{sd}}}; + else + fds = fields{year_month_day{sd - days{1}}, + hh_mm_ss{days{1} - (sys_seconds{sd} - tp)}}; + return to_stream(os, fmt, fds, &abbrev, &offset); +} + +// format + +template +auto +format(const std::locale& loc, const CharT* fmt, const Streamable& tp) + -> decltype(to_stream(std::declval&>(), fmt, tp), + std::basic_string{}) +{ + std::basic_ostringstream os; + os.exceptions(std::ios::failbit | std::ios::badbit); + os.imbue(loc); + to_stream(os, fmt, tp); + return os.str(); +} + +template +auto +format(const CharT* fmt, const Streamable& tp) + -> decltype(to_stream(std::declval&>(), fmt, tp), + std::basic_string{}) +{ + std::basic_ostringstream os; + os.exceptions(std::ios::failbit | std::ios::badbit); + to_stream(os, fmt, tp); + return os.str(); +} + +template +auto +format(const std::locale& loc, const std::basic_string& fmt, + const Streamable& tp) + -> decltype(to_stream(std::declval&>(), fmt.c_str(), tp), + std::basic_string{}) +{ + std::basic_ostringstream os; + os.exceptions(std::ios::failbit | std::ios::badbit); + os.imbue(loc); + to_stream(os, fmt.c_str(), tp); + return os.str(); +} + +template +auto +format(const std::basic_string& fmt, const Streamable& tp) + -> decltype(to_stream(std::declval&>(), fmt.c_str(), tp), + std::basic_string{}) +{ + std::basic_ostringstream os; + os.exceptions(std::ios::failbit | std::ios::badbit); + to_stream(os, fmt.c_str(), tp); + return os.str(); +} + +// parse + +namespace detail +{ + +template +bool +read_char(std::basic_istream& is, CharT fmt, std::ios::iostate& err) +{ + auto ic = is.get(); + if (Traits::eq_int_type(ic, Traits::eof()) || + !Traits::eq(Traits::to_char_type(ic), fmt)) + { + err |= std::ios::failbit; + is.setstate(std::ios::failbit); + return false; + } + return true; +} + +template +unsigned +read_unsigned(std::basic_istream& is, unsigned m = 1, unsigned M = 10) +{ + unsigned x = 0; + unsigned count = 0; + while (true) + { + auto ic = is.peek(); + if (Traits::eq_int_type(ic, Traits::eof())) + break; + auto c = static_cast(Traits::to_char_type(ic)); + if (!('0' <= c && c <= '9')) + break; + (void)is.get(); + ++count; + x = 10*x + static_cast(c - '0'); + if (count == M) + break; + } + if (count < m) + is.setstate(std::ios::failbit); + return x; +} + +template +int +read_signed(std::basic_istream& is, unsigned m = 1, unsigned M = 10) +{ + auto ic = is.peek(); + if (!Traits::eq_int_type(ic, Traits::eof())) + { + auto c = static_cast(Traits::to_char_type(ic)); + if (('0' <= c && c <= '9') || c == '-' || c == '+') + { + if (c == '-' || c == '+') + (void)is.get(); + auto x = static_cast(read_unsigned(is, std::max(m, 1u), M)); + if (!is.fail()) + { + if (c == '-') + x = -x; + return x; + } + } + } + if (m > 0) + is.setstate(std::ios::failbit); + return 0; +} + +template +long double +read_long_double(std::basic_istream& is, unsigned m = 1, unsigned M = 10) +{ + unsigned count = 0; + unsigned fcount = 0; + unsigned long long i = 0; + unsigned long long f = 0; + bool parsing_fraction = false; +#if ONLY_C_LOCALE + typename Traits::int_type decimal_point = '.'; +#else + auto decimal_point = Traits::to_int_type( + std::use_facet>(is.getloc()).decimal_point()); +#endif + while (true) + { + auto ic = is.peek(); + if (Traits::eq_int_type(ic, Traits::eof())) + break; + if (Traits::eq_int_type(ic, decimal_point)) + { + decimal_point = Traits::eof(); + parsing_fraction = true; + } + else + { + auto c = static_cast(Traits::to_char_type(ic)); + if (!('0' <= c && c <= '9')) + break; + if (!parsing_fraction) + { + i = 10*i + static_cast(c - '0'); + } + else + { + f = 10*f + static_cast(c - '0'); + ++fcount; + } + } + (void)is.get(); + if (++count == M) + break; + } + if (count < m) + { + is.setstate(std::ios::failbit); + return 0; + } + return static_cast(i) + static_cast(f)/std::pow(10.L, fcount); +} + +struct rs +{ + int& i; + unsigned m; + unsigned M; +}; + +struct ru +{ + int& i; + unsigned m; + unsigned M; +}; + +struct rld +{ + long double& i; + unsigned m; + unsigned M; +}; + +template +void +read(std::basic_istream&) +{ +} + +template +void +read(std::basic_istream& is, CharT a0, Args&& ...args); + +template +void +read(std::basic_istream& is, rs a0, Args&& ...args); + +template +void +read(std::basic_istream& is, ru a0, Args&& ...args); + +template +void +read(std::basic_istream& is, int a0, Args&& ...args); + +template +void +read(std::basic_istream& is, rld a0, Args&& ...args); + +template +void +read(std::basic_istream& is, CharT a0, Args&& ...args) +{ + // No-op if a0 == CharT{} + if (a0 != CharT{}) + { + auto ic = is.peek(); + if (Traits::eq_int_type(ic, Traits::eof())) + { + is.setstate(std::ios::failbit | std::ios::eofbit); + return; + } + if (!Traits::eq(Traits::to_char_type(ic), a0)) + { + is.setstate(std::ios::failbit); + return; + } + (void)is.get(); + } + read(is, std::forward(args)...); +} + +template +void +read(std::basic_istream& is, rs a0, Args&& ...args) +{ + auto x = read_signed(is, a0.m, a0.M); + if (is.fail()) + return; + a0.i = x; + read(is, std::forward(args)...); +} + +template +void +read(std::basic_istream& is, ru a0, Args&& ...args) +{ + auto x = read_unsigned(is, a0.m, a0.M); + if (is.fail()) + return; + a0.i = static_cast(x); + read(is, std::forward(args)...); +} + +template +void +read(std::basic_istream& is, int a0, Args&& ...args) +{ + if (a0 != -1) + { + auto u = static_cast(a0); + CharT buf[std::numeric_limits::digits10+2u] = {}; + auto e = buf; + do + { + *e++ = static_cast(CharT(u % 10) + CharT{'0'}); + u /= 10; + } while (u > 0); + std::reverse(buf, e); + for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p) + read(is, *p); + } + if (is.rdstate() == std::ios::goodbit) + read(is, std::forward(args)...); +} + +template +void +read(std::basic_istream& is, rld a0, Args&& ...args) +{ + auto x = read_long_double(is, a0.m, a0.M); + if (is.fail()) + return; + a0.i = x; + read(is, std::forward(args)...); +} + +template +inline +void +checked_set(T& value, T from, T not_a_value, std::basic_ios& is) +{ + if (!is.fail()) + { + if (value == not_a_value) + value = std::move(from); + else if (value != from) + is.setstate(std::ios::failbit); + } +} + +} // namespace detail; + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, + fields& fds, std::basic_string* abbrev, + std::chrono::minutes* offset) +{ + using std::numeric_limits; + using std::ios; + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::seconds; + using std::chrono::minutes; + using std::chrono::hours; + using detail::round_i; + typename std::basic_istream::sentry ok{is, true}; + if (ok) + { + date::detail::save_istream ss(is); + is.fill(' '); + is.flags(std::ios::skipws | std::ios::dec); + is.width(0); +#if !ONLY_C_LOCALE + auto& f = std::use_facet>(is.getloc()); + std::tm tm{}; +#endif + const CharT* command = nullptr; + auto modified = CharT{}; + auto width = -1; + + CONSTDATA int not_a_year = numeric_limits::min(); + CONSTDATA int not_a_2digit_year = 100; + CONSTDATA int not_a_century = not_a_year / 100; + CONSTDATA int not_a_month = 0; + CONSTDATA int not_a_day = 0; + CONSTDATA int not_a_hour = numeric_limits::min(); + CONSTDATA int not_a_hour_12_value = 0; + CONSTDATA int not_a_minute = not_a_hour; + CONSTDATA Duration not_a_second = Duration::min(); + CONSTDATA int not_a_doy = -1; + CONSTDATA int not_a_weekday = 8; + CONSTDATA int not_a_week_num = 100; + CONSTDATA int not_a_ampm = -1; + CONSTDATA minutes not_a_offset = minutes::min(); + + int Y = not_a_year; // c, F, Y * + int y = not_a_2digit_year; // D, x, y * + int g = not_a_2digit_year; // g * + int G = not_a_year; // G * + int C = not_a_century; // C * + int m = not_a_month; // b, B, h, m, c, D, F, x * + int d = not_a_day; // c, d, D, e, F, x * + int j = not_a_doy; // j * + int wd = not_a_weekday; // a, A, u, w * + int H = not_a_hour; // c, H, R, T, X * + int I = not_a_hour_12_value; // I, r * + int p = not_a_ampm; // p, r * + int M = not_a_minute; // c, M, r, R, T, X * + Duration s = not_a_second; // c, r, S, T, X * + int U = not_a_week_num; // U * + int V = not_a_week_num; // V * + int W = not_a_week_num; // W * + std::basic_string temp_abbrev; // Z * + minutes temp_offset = not_a_offset; // z * + + using detail::read; + using detail::rs; + using detail::ru; + using detail::rld; + using detail::checked_set; + for (; *fmt != CharT{} && !is.fail(); ++fmt) + { + switch (*fmt) + { + case 'a': + case 'A': + case 'u': + case 'w': // wd: a, A, u, w + if (command) + { + int trial_wd = not_a_weekday; + if (*fmt == 'a' || *fmt == 'A') + { + if (modified == CharT{}) + { +#if !ONLY_C_LOCALE + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + is.setstate(err); + if (!is.fail()) + trial_wd = tm.tm_wday; +#else + auto nm = detail::weekday_names(); + auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first; + if (!is.fail()) + trial_wd = i % 7; +#endif + } + else + read(is, CharT{'%'}, width, modified, *fmt); + } + else // *fmt == 'u' || *fmt == 'w' + { +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#else + if (modified != CharT{'E'}) +#endif + { + read(is, ru{trial_wd, 1, width == -1 ? + 1u : static_cast(width)}); + if (!is.fail()) + { + if (*fmt == 'u') + { + if (!(1 <= trial_wd && trial_wd <= 7)) + { + trial_wd = not_a_weekday; + is.setstate(ios::failbit); + } + else if (trial_wd == 7) + trial_wd = 0; + } + else // *fmt == 'w' + { + if (!(0 <= trial_wd && trial_wd <= 6)) + { + trial_wd = not_a_weekday; + is.setstate(ios::failbit); + } + } + } + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + is.setstate(err); + if (!is.fail()) + trial_wd = tm.tm_wday; + } +#endif + else + read(is, CharT{'%'}, width, modified, *fmt); + } + if (trial_wd != not_a_weekday) + checked_set(wd, trial_wd, not_a_weekday, is); + } + else // !command + read(is, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + break; + case 'b': + case 'B': + case 'h': + if (command) + { + if (modified == CharT{}) + { + int ttm = not_a_month; +#if !ONLY_C_LOCALE + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + ttm = tm.tm_mon + 1; + is.setstate(err); +#else + auto nm = detail::month_names(); + auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first; + if (!is.fail()) + ttm = i % 12 + 1; +#endif + checked_set(m, ttm, not_a_month, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'c': + if (command) + { + if (modified != CharT{'O'}) + { +#if !ONLY_C_LOCALE + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + { + checked_set(Y, tm.tm_year + 1900, not_a_year, is); + checked_set(m, tm.tm_mon + 1, not_a_month, is); + checked_set(d, tm.tm_mday, not_a_day, is); + checked_set(H, tm.tm_hour, not_a_hour, is); + checked_set(M, tm.tm_min, not_a_minute, is); + checked_set(s, duration_cast(seconds{tm.tm_sec}), + not_a_second, is); + } + is.setstate(err); +#else + // "%a %b %e %T %Y" + auto nm = detail::weekday_names(); + auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first; + checked_set(wd, static_cast(i % 7), not_a_weekday, is); + ws(is); + nm = detail::month_names(); + i = detail::scan_keyword(is, nm.first, nm.second) - nm.first; + checked_set(m, static_cast(i % 12 + 1), not_a_month, is); + ws(is); + int td = not_a_day; + read(is, rs{td, 1, 2}); + checked_set(d, td, not_a_day, is); + ws(is); + using dfs = detail::decimal_format_seconds; + CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; + int tH; + int tM; + long double S{}; + read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, + CharT{':'}, rld{S, 1, w}); + checked_set(H, tH, not_a_hour, is); + checked_set(M, tM, not_a_minute, is); + checked_set(s, round_i(duration{S}), + not_a_second, is); + ws(is); + int tY = not_a_year; + read(is, rs{tY, 1, 4u}); + checked_set(Y, tY, not_a_year, is); +#endif + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'x': + if (command) + { + if (modified != CharT{'O'}) + { +#if !ONLY_C_LOCALE + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + { + checked_set(Y, tm.tm_year + 1900, not_a_year, is); + checked_set(m, tm.tm_mon + 1, not_a_month, is); + checked_set(d, tm.tm_mday, not_a_day, is); + } + is.setstate(err); +#else + // "%m/%d/%y" + int ty = not_a_2digit_year; + int tm = not_a_month; + int td = not_a_day; + read(is, ru{tm, 1, 2}, CharT{'/'}, ru{td, 1, 2}, CharT{'/'}, + rs{ty, 1, 2}); + checked_set(y, ty, not_a_2digit_year, is); + checked_set(m, tm, not_a_month, is); + checked_set(d, td, not_a_day, is); +#endif + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'X': + if (command) + { + if (modified != CharT{'O'}) + { +#if !ONLY_C_LOCALE + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + { + checked_set(H, tm.tm_hour, not_a_hour, is); + checked_set(M, tm.tm_min, not_a_minute, is); + checked_set(s, duration_cast(seconds{tm.tm_sec}), + not_a_second, is); + } + is.setstate(err); +#else + // "%T" + using dfs = detail::decimal_format_seconds; + CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; + int tH = not_a_hour; + int tM = not_a_minute; + long double S{}; + read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, + CharT{':'}, rld{S, 1, w}); + checked_set(H, tH, not_a_hour, is); + checked_set(M, tM, not_a_minute, is); + checked_set(s, round_i(duration{S}), + not_a_second, is); +#endif + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'C': + if (command) + { + int tC = not_a_century; +#if !ONLY_C_LOCALE + if (modified == CharT{}) + { +#endif + read(is, rs{tC, 1, width == -1 ? 2u : static_cast(width)}); +#if !ONLY_C_LOCALE + } + else + { + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + { + auto tY = tm.tm_year + 1900; + tC = (tY >= 0 ? tY : tY-99) / 100; + } + is.setstate(err); + } +#endif + checked_set(C, tC, not_a_century, is); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'D': + if (command) + { + if (modified == CharT{}) + { + int tn = not_a_month; + int td = not_a_day; + int ty = not_a_2digit_year; + read(is, ru{tn, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'}, + ru{td, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'}, + rs{ty, 1, 2}); + checked_set(y, ty, not_a_2digit_year, is); + checked_set(m, tn, not_a_month, is); + checked_set(d, td, not_a_day, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'F': + if (command) + { + if (modified == CharT{}) + { + int tY = not_a_year; + int tn = not_a_month; + int td = not_a_day; + read(is, rs{tY, 1, width == -1 ? 4u : static_cast(width)}, + CharT{'-'}, ru{tn, 1, 2}, CharT{'-'}, ru{td, 1, 2}); + checked_set(Y, tY, not_a_year, is); + checked_set(m, tn, not_a_month, is); + checked_set(d, td, not_a_day, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'd': + case 'e': + if (command) + { +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#else + if (modified != CharT{'E'}) +#endif + { + int td = not_a_day; + read(is, rs{td, 1, width == -1 ? 2u : static_cast(width)}); + checked_set(d, td, not_a_day, is); + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + command = nullptr; + width = -1; + modified = CharT{}; + if ((err & ios::failbit) == 0) + checked_set(d, tm.tm_mday, not_a_day, is); + is.setstate(err); + } +#endif + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'H': + if (command) + { +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#else + if (modified != CharT{'E'}) +#endif + { + int tH = not_a_hour; + read(is, ru{tH, 1, width == -1 ? 2u : static_cast(width)}); + checked_set(H, tH, not_a_hour, is); + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + checked_set(H, tm.tm_hour, not_a_hour, is); + is.setstate(err); + } +#endif + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'I': + if (command) + { + if (modified == CharT{}) + { + int tI = not_a_hour_12_value; + // reads in an hour into I, but most be in [1, 12] + read(is, rs{tI, 1, width == -1 ? 2u : static_cast(width)}); + if (!(1 <= tI && tI <= 12)) + is.setstate(ios::failbit); + checked_set(I, tI, not_a_hour_12_value, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'j': + if (command) + { + if (modified == CharT{}) + { + int tj = not_a_doy; + read(is, ru{tj, 1, width == -1 ? 3u : static_cast(width)}); + checked_set(j, tj, not_a_doy, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'M': + if (command) + { +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#else + if (modified != CharT{'E'}) +#endif + { + int tM = not_a_minute; + read(is, ru{tM, 1, width == -1 ? 2u : static_cast(width)}); + checked_set(M, tM, not_a_minute, is); + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + checked_set(M, tm.tm_min, not_a_minute, is); + is.setstate(err); + } +#endif + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'm': + if (command) + { +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#else + if (modified != CharT{'E'}) +#endif + { + int tn = not_a_month; + read(is, rs{tn, 1, width == -1 ? 2u : static_cast(width)}); + checked_set(m, tn, not_a_month, is); + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + checked_set(m, tm.tm_mon + 1, not_a_month, is); + is.setstate(err); + } +#endif + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'n': + case 't': + if (command) + { + if (modified == CharT{}) + { + // %n matches a single white space character + // %t matches 0 or 1 white space characters + auto ic = is.peek(); + if (Traits::eq_int_type(ic, Traits::eof())) + { + ios::iostate err = ios::eofbit; + if (*fmt == 'n') + err |= ios::failbit; + is.setstate(err); + break; + } + if (isspace(ic)) + { + (void)is.get(); + } + else if (*fmt == 'n') + is.setstate(ios::failbit); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'p': + if (command) + { + if (modified == CharT{}) + { + int tp = not_a_ampm; +#if !ONLY_C_LOCALE + tm = std::tm{}; + tm.tm_hour = 1; + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + is.setstate(err); + if (tm.tm_hour == 1) + tp = 0; + else if (tm.tm_hour == 13) + tp = 1; + else + is.setstate(err); +#else + auto nm = detail::ampm_names(); + auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first; + tp = static_cast(i); +#endif + checked_set(p, tp, not_a_ampm, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + + break; + case 'r': + if (command) + { + if (modified == CharT{}) + { +#if !ONLY_C_LOCALE + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + { + checked_set(H, tm.tm_hour, not_a_hour, is); + checked_set(M, tm.tm_min, not_a_hour, is); + checked_set(s, duration_cast(seconds{tm.tm_sec}), + not_a_second, is); + } + is.setstate(err); +#else + // "%I:%M:%S %p" + using dfs = detail::decimal_format_seconds; + CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; + long double S{}; + int tI = not_a_hour_12_value; + int tM = not_a_minute; + read(is, ru{tI, 1, 2}, CharT{':'}, ru{tM, 1, 2}, + CharT{':'}, rld{S, 1, w}); + checked_set(I, tI, not_a_hour_12_value, is); + checked_set(M, tM, not_a_minute, is); + checked_set(s, round_i(duration{S}), + not_a_second, is); + ws(is); + auto nm = detail::ampm_names(); + auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first; + checked_set(p, static_cast(i), not_a_ampm, is); +#endif + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'R': + if (command) + { + if (modified == CharT{}) + { + int tH = not_a_hour; + int tM = not_a_minute; + read(is, ru{tH, 1, 2}, CharT{'\0'}, CharT{':'}, CharT{'\0'}, + ru{tM, 1, 2}, CharT{'\0'}); + checked_set(H, tH, not_a_hour, is); + checked_set(M, tM, not_a_minute, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'S': + if (command) + { + #if !ONLY_C_LOCALE + if (modified == CharT{}) +#else + if (modified != CharT{'E'}) +#endif + { + using dfs = detail::decimal_format_seconds; + CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; + long double S{}; + read(is, rld{S, 1, width == -1 ? w : static_cast(width)}); + checked_set(s, round_i(duration{S}), + not_a_second, is); + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'O'}) + { + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + checked_set(s, duration_cast(seconds{tm.tm_sec}), + not_a_second, is); + is.setstate(err); + } +#endif + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'T': + if (command) + { + if (modified == CharT{}) + { + using dfs = detail::decimal_format_seconds; + CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width; + int tH = not_a_hour; + int tM = not_a_minute; + long double S{}; + read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, + CharT{':'}, rld{S, 1, w}); + checked_set(H, tH, not_a_hour, is); + checked_set(M, tM, not_a_minute, is); + checked_set(s, round_i(duration{S}), + not_a_second, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'Y': + if (command) + { +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#else + if (modified != CharT{'O'}) +#endif + { + int tY = not_a_year; + read(is, rs{tY, 1, width == -1 ? 4u : static_cast(width)}); + checked_set(Y, tY, not_a_year, is); + } +#if !ONLY_C_LOCALE + else if (modified == CharT{'E'}) + { + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + checked_set(Y, tm.tm_year + 1900, not_a_year, is); + is.setstate(err); + } +#endif + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'y': + if (command) + { +#if !ONLY_C_LOCALE + if (modified == CharT{}) +#endif + { + int ty = not_a_2digit_year; + read(is, ru{ty, 1, width == -1 ? 2u : static_cast(width)}); + checked_set(y, ty, not_a_2digit_year, is); + } +#if !ONLY_C_LOCALE + else + { + ios::iostate err = ios::goodbit; + f.get(is, nullptr, is, err, &tm, command, fmt+1); + if ((err & ios::failbit) == 0) + checked_set(Y, tm.tm_year + 1900, not_a_year, is); + is.setstate(err); + } +#endif + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'g': + if (command) + { + if (modified == CharT{}) + { + int tg = not_a_2digit_year; + read(is, ru{tg, 1, width == -1 ? 2u : static_cast(width)}); + checked_set(g, tg, not_a_2digit_year, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'G': + if (command) + { + if (modified == CharT{}) + { + int tG = not_a_year; + read(is, rs{tG, 1, width == -1 ? 4u : static_cast(width)}); + checked_set(G, tG, not_a_year, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'U': + if (command) + { + if (modified == CharT{}) + { + int tU = not_a_week_num; + read(is, ru{tU, 1, width == -1 ? 2u : static_cast(width)}); + checked_set(U, tU, not_a_week_num, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'V': + if (command) + { + if (modified == CharT{}) + { + int tV = not_a_week_num; + read(is, ru{tV, 1, width == -1 ? 2u : static_cast(width)}); + checked_set(V, tV, not_a_week_num, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'W': + if (command) + { + if (modified == CharT{}) + { + int tW = not_a_week_num; + read(is, ru{tW, 1, width == -1 ? 2u : static_cast(width)}); + checked_set(W, tW, not_a_week_num, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'E': + case 'O': + if (command) + { + if (modified == CharT{}) + { + modified = *fmt; + } + else + { + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + } + else + read(is, *fmt); + break; + case '%': + if (command) + { + if (modified == CharT{}) + read(is, *fmt); + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + command = fmt; + break; + case 'z': + if (command) + { + int tH, tM; + minutes toff = not_a_offset; + bool neg = false; + auto ic = is.peek(); + if (!Traits::eq_int_type(ic, Traits::eof())) + { + auto c = static_cast(Traits::to_char_type(ic)); + if (c == '-') + neg = true; + } + if (modified == CharT{}) + { + read(is, rs{tH, 2, 2}); + if (!is.fail()) + toff = hours{std::abs(tH)}; + if (is.good()) + { + ic = is.peek(); + if (!Traits::eq_int_type(ic, Traits::eof())) + { + auto c = static_cast(Traits::to_char_type(ic)); + if ('0' <= c && c <= '9') + { + read(is, ru{tM, 2, 2}); + if (!is.fail()) + toff += minutes{tM}; + } + } + } + } + else + { + read(is, rs{tH, 1, 2}); + if (!is.fail()) + toff = hours{std::abs(tH)}; + if (is.good()) + { + ic = is.peek(); + if (!Traits::eq_int_type(ic, Traits::eof())) + { + auto c = static_cast(Traits::to_char_type(ic)); + if (c == ':') + { + (void)is.get(); + read(is, ru{tM, 2, 2}); + if (!is.fail()) + toff += minutes{tM}; + } + } + } + } + if (neg) + toff = -toff; + checked_set(temp_offset, toff, not_a_offset, is); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + case 'Z': + if (command) + { + if (modified == CharT{}) + { + std::basic_string buf; + while (is.rdstate() == std::ios::goodbit) + { + auto i = is.rdbuf()->sgetc(); + if (Traits::eq_int_type(i, Traits::eof())) + { + is.setstate(ios::eofbit); + break; + } + auto wc = Traits::to_char_type(i); + auto c = static_cast(wc); + // is c a valid time zone name or abbreviation character? + if (!(CharT{1} < wc && wc < CharT{127}) || !(isalnum(c) || + c == '_' || c == '/' || c == '-' || c == '+')) + break; + buf.push_back(c); + is.rdbuf()->sbumpc(); + } + if (buf.empty()) + is.setstate(ios::failbit); + checked_set(temp_abbrev, buf, {}, is); + } + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + else + read(is, *fmt); + break; + default: + if (command) + { + if (width == -1 && modified == CharT{} && '0' <= *fmt && *fmt <= '9') + { + width = static_cast(*fmt) - '0'; + while ('0' <= fmt[1] && fmt[1] <= '9') + width = 10*width + static_cast(*++fmt) - '0'; + } + else + { + if (modified == CharT{}) + read(is, CharT{'%'}, width, *fmt); + else + read(is, CharT{'%'}, width, modified, *fmt); + command = nullptr; + width = -1; + modified = CharT{}; + } + } + else // !command + { + if (isspace(static_cast(*fmt))) + { + // space matches 0 or more white space characters + if (is.good()) + ws(is); + } + else + read(is, *fmt); + } + break; + } + } + // is.fail() || *fmt == CharT{} + if (is.rdstate() == ios::goodbit && command) + { + if (modified == CharT{}) + read(is, CharT{'%'}, width); + else + read(is, CharT{'%'}, width, modified); + } + if (!is.fail()) + { + if (y != not_a_2digit_year) + { + // Convert y and an optional C to Y + if (!(0 <= y && y <= 99)) + goto broken; + if (C == not_a_century) + { + if (Y == not_a_year) + { + if (y >= 69) + C = 19; + else + C = 20; + } + else + { + C = (Y >= 0 ? Y : Y-100) / 100; + } + } + int tY; + if (C >= 0) + tY = 100*C + y; + else + tY = 100*(C+1) - (y == 0 ? 100 : y); + if (Y != not_a_year && Y != tY) + goto broken; + Y = tY; + } + if (g != not_a_2digit_year) + { + // Convert g and an optional C to G + if (!(0 <= g && g <= 99)) + goto broken; + if (C == not_a_century) + { + if (G == not_a_year) + { + if (g >= 69) + C = 19; + else + C = 20; + } + else + { + C = (G >= 0 ? G : G-100) / 100; + } + } + int tG; + if (C >= 0) + tG = 100*C + g; + else + tG = 100*(C+1) - (g == 0 ? 100 : g); + if (G != not_a_year && G != tG) + goto broken; + G = tG; + } + if (Y < static_cast(year::min()) || Y > static_cast(year::max())) + Y = not_a_year; + bool computed = false; + if (G != not_a_year && V != not_a_week_num && wd != not_a_weekday) + { + year_month_day ymd_trial = sys_days(year{G-1}/December/Thursday[last]) + + (Monday-Thursday) + weeks{V-1} + + (weekday{static_cast(wd)}-Monday); + if (Y == not_a_year) + Y = static_cast(ymd_trial.year()); + else if (year{Y} != ymd_trial.year()) + goto broken; + if (m == not_a_month) + m = static_cast(static_cast(ymd_trial.month())); + else if (month(static_cast(m)) != ymd_trial.month()) + goto broken; + if (d == not_a_day) + d = static_cast(static_cast(ymd_trial.day())); + else if (day(static_cast(d)) != ymd_trial.day()) + goto broken; + computed = true; + } + if (Y != not_a_year && U != not_a_week_num && wd != not_a_weekday) + { + year_month_day ymd_trial = sys_days(year{Y}/January/Sunday[1]) + + weeks{U-1} + + (weekday{static_cast(wd)} - Sunday); + if (Y == not_a_year) + Y = static_cast(ymd_trial.year()); + else if (year{Y} != ymd_trial.year()) + goto broken; + if (m == not_a_month) + m = static_cast(static_cast(ymd_trial.month())); + else if (month(static_cast(m)) != ymd_trial.month()) + goto broken; + if (d == not_a_day) + d = static_cast(static_cast(ymd_trial.day())); + else if (day(static_cast(d)) != ymd_trial.day()) + goto broken; + computed = true; + } + if (Y != not_a_year && W != not_a_week_num && wd != not_a_weekday) + { + year_month_day ymd_trial = sys_days(year{Y}/January/Monday[1]) + + weeks{W-1} + + (weekday{static_cast(wd)} - Monday); + if (Y == not_a_year) + Y = static_cast(ymd_trial.year()); + else if (year{Y} != ymd_trial.year()) + goto broken; + if (m == not_a_month) + m = static_cast(static_cast(ymd_trial.month())); + else if (month(static_cast(m)) != ymd_trial.month()) + goto broken; + if (d == not_a_day) + d = static_cast(static_cast(ymd_trial.day())); + else if (day(static_cast(d)) != ymd_trial.day()) + goto broken; + computed = true; + } + if (j != not_a_doy && Y != not_a_year) + { + auto ymd_trial = year_month_day{local_days(year{Y}/1/1) + days{j-1}}; + if (m == not_a_month) + m = static_cast(static_cast(ymd_trial.month())); + else if (month(static_cast(m)) != ymd_trial.month()) + goto broken; + if (d == not_a_day) + d = static_cast(static_cast(ymd_trial.day())); + else if (day(static_cast(d)) != ymd_trial.day()) + goto broken; + j = not_a_doy; + } + auto ymd = year{Y}/m/d; + if (ymd.ok()) + { + if (wd == not_a_weekday) + wd = static_cast((weekday(sys_days(ymd)) - Sunday).count()); + else if (wd != static_cast((weekday(sys_days(ymd)) - Sunday).count())) + goto broken; + if (!computed) + { + if (G != not_a_year || V != not_a_week_num) + { + sys_days sd = ymd; + auto G_trial = year_month_day{sd + days{3}}.year(); + auto start = sys_days((G_trial - years{1})/December/Thursday[last]) + + (Monday - Thursday); + if (sd < start) + { + --G_trial; + if (V != not_a_week_num) + start = sys_days((G_trial - years{1})/December/Thursday[last]) + + (Monday - Thursday); + } + if (G != not_a_year && G != static_cast(G_trial)) + goto broken; + if (V != not_a_week_num) + { + auto V_trial = duration_cast(sd - start).count() + 1; + if (V != V_trial) + goto broken; + } + } + if (U != not_a_week_num) + { + auto start = sys_days(Sunday[1]/January/ymd.year()); + auto U_trial = floor(sys_days(ymd) - start).count() + 1; + if (U != U_trial) + goto broken; + } + if (W != not_a_week_num) + { + auto start = sys_days(Monday[1]/January/ymd.year()); + auto W_trial = floor(sys_days(ymd) - start).count() + 1; + if (W != W_trial) + goto broken; + } + } + } + fds.ymd = ymd; + if (I != not_a_hour_12_value) + { + if (!(1 <= I && I <= 12)) + goto broken; + if (p != not_a_ampm) + { + // p is in [0, 1] == [AM, PM] + // Store trial H in I + if (I == 12) + --p; + I += p*12; + // Either set H from I or make sure H and I are consistent + if (H == not_a_hour) + H = I; + else if (I != H) + goto broken; + } + else // p == not_a_ampm + { + // if H, make sure H and I could be consistent + if (H != not_a_hour) + { + if (I == 12) + { + if (H != 0 && H != 12) + goto broken; + } + else if (!(I == H || I == H+12)) + { + goto broken; + } + } + else // I is ambiguous, AM or PM? + goto broken; + } + } + if (H != not_a_hour) + { + fds.has_tod = true; + fds.tod = hh_mm_ss{hours{H}}; + } + if (M != not_a_minute) + { + fds.has_tod = true; + fds.tod.m_ = minutes{M}; + } + if (s != not_a_second) + { + fds.has_tod = true; + fds.tod.s_ = detail::decimal_format_seconds{s}; + } + if (j != not_a_doy) + { + fds.has_tod = true; + fds.tod.h_ += hours{days{j}}; + } + if (wd != not_a_weekday) + fds.wd = weekday{static_cast(wd)}; + if (abbrev != nullptr) + *abbrev = std::move(temp_abbrev); + if (offset != nullptr && temp_offset != not_a_offset) + *offset = temp_offset; + } + return is; + } +broken: + is.setstate(ios::failbit); + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, year& y, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using CT = std::chrono::seconds; + fields fds{}; + date::from_stream(is, fmt, fds, abbrev, offset); + if (!fds.ymd.year().ok()) + is.setstate(std::ios::failbit); + if (!is.fail()) + y = fds.ymd.year(); + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, month& m, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using CT = std::chrono::seconds; + fields fds{}; + date::from_stream(is, fmt, fds, abbrev, offset); + if (!fds.ymd.month().ok()) + is.setstate(std::ios::failbit); + if (!is.fail()) + m = fds.ymd.month(); + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, day& d, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using CT = std::chrono::seconds; + fields fds{}; + date::from_stream(is, fmt, fds, abbrev, offset); + if (!fds.ymd.day().ok()) + is.setstate(std::ios::failbit); + if (!is.fail()) + d = fds.ymd.day(); + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, weekday& wd, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using CT = std::chrono::seconds; + fields fds{}; + date::from_stream(is, fmt, fds, abbrev, offset); + if (!fds.wd.ok()) + is.setstate(std::ios::failbit); + if (!is.fail()) + wd = fds.wd; + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, year_month& ym, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using CT = std::chrono::seconds; + fields fds{}; + date::from_stream(is, fmt, fds, abbrev, offset); + if (!fds.ymd.month().ok()) + is.setstate(std::ios::failbit); + if (!is.fail()) + ym = fds.ymd.year()/fds.ymd.month(); + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, month_day& md, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using CT = std::chrono::seconds; + fields fds{}; + date::from_stream(is, fmt, fds, abbrev, offset); + if (!fds.ymd.month().ok() || !fds.ymd.day().ok()) + is.setstate(std::ios::failbit); + if (!is.fail()) + md = fds.ymd.month()/fds.ymd.day(); + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, + year_month_day& ymd, std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using CT = std::chrono::seconds; + fields fds{}; + date::from_stream(is, fmt, fds, abbrev, offset); + if (!fds.ymd.ok()) + is.setstate(std::ios::failbit); + if (!is.fail()) + ymd = fds.ymd; + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, + sys_time& tp, std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using CT = typename std::common_type::type; + using detail::round_i; + std::chrono::minutes offset_local{}; + auto offptr = offset ? offset : &offset_local; + fields fds{}; + fds.has_tod = true; + date::from_stream(is, fmt, fds, abbrev, offptr); + if (!fds.ymd.ok() || !fds.tod.in_conventional_range()) + is.setstate(std::ios::failbit); + if (!is.fail()) + tp = round_i(sys_days(fds.ymd) - *offptr + fds.tod.to_duration()); + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, + local_time& tp, std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using CT = typename std::common_type::type; + using detail::round_i; + fields fds{}; + fds.has_tod = true; + date::from_stream(is, fmt, fds, abbrev, offset); + if (!fds.ymd.ok() || !fds.tod.in_conventional_range()) + is.setstate(std::ios::failbit); + if (!is.fail()) + tp = round_i(local_seconds{local_days(fds.ymd)} + fds.tod.to_duration()); + return is; +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, + std::chrono::duration& d, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using Duration = std::chrono::duration; + using CT = typename std::common_type::type; + using detail::round_i; + fields fds{}; + date::from_stream(is, fmt, fds, abbrev, offset); + if (!fds.has_tod) + is.setstate(std::ios::failbit); + if (!is.fail()) + d = round_i(fds.tod.to_duration()); + return is; +} + +template , + class Alloc = std::allocator> +struct parse_manip +{ + const std::basic_string format_; + Parsable& tp_; + std::basic_string* abbrev_; + std::chrono::minutes* offset_; + +public: + parse_manip(std::basic_string format, Parsable& tp, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) + : format_(std::move(format)) + , tp_(tp) + , abbrev_(abbrev) + , offset_(offset) + {} + +#if HAS_STRING_VIEW + parse_manip(const CharT* format, Parsable& tp, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) + : format_(format) + , tp_(tp) + , abbrev_(abbrev) + , offset_(offset) + {} + + parse_manip(std::basic_string_view format, Parsable& tp, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) + : format_(format) + , tp_(tp) + , abbrev_(abbrev) + , offset_(offset) + {} +#endif // HAS_STRING_VIEW +}; + +template +std::basic_istream& +operator>>(std::basic_istream& is, + const parse_manip& x) +{ + return date::from_stream(is, x.format_.c_str(), x.tp_, x.abbrev_, x.offset_); +} + +template +inline +auto +parse(const std::basic_string& format, Parsable& tp) + -> decltype(date::from_stream(std::declval&>(), + format.c_str(), tp), + parse_manip{format, tp}) +{ + return {format, tp}; +} + +template +inline +auto +parse(const std::basic_string& format, Parsable& tp, + std::basic_string& abbrev) + -> decltype(date::from_stream(std::declval&>(), + format.c_str(), tp, &abbrev), + parse_manip{format, tp, &abbrev}) +{ + return {format, tp, &abbrev}; +} + +template +inline +auto +parse(const std::basic_string& format, Parsable& tp, + std::chrono::minutes& offset) + -> decltype(date::from_stream(std::declval&>(), + format.c_str(), tp, + std::declval*>(), + &offset), + parse_manip{format, tp, nullptr, &offset}) +{ + return {format, tp, nullptr, &offset}; +} + +template +inline +auto +parse(const std::basic_string& format, Parsable& tp, + std::basic_string& abbrev, std::chrono::minutes& offset) + -> decltype(date::from_stream(std::declval&>(), + format.c_str(), tp, &abbrev, &offset), + parse_manip{format, tp, &abbrev, &offset}) +{ + return {format, tp, &abbrev, &offset}; +} + +// const CharT* formats + +template +inline +auto +parse(const CharT* format, Parsable& tp) + -> decltype(date::from_stream(std::declval&>(), format, tp), + parse_manip{format, tp}) +{ + return {format, tp}; +} + +template +inline +auto +parse(const CharT* format, Parsable& tp, std::basic_string& abbrev) + -> decltype(date::from_stream(std::declval&>(), format, + tp, &abbrev), + parse_manip{format, tp, &abbrev}) +{ + return {format, tp, &abbrev}; +} + +template +inline +auto +parse(const CharT* format, Parsable& tp, std::chrono::minutes& offset) + -> decltype(date::from_stream(std::declval&>(), format, + tp, std::declval*>(), &offset), + parse_manip{format, tp, nullptr, &offset}) +{ + return {format, tp, nullptr, &offset}; +} + +template +inline +auto +parse(const CharT* format, Parsable& tp, + std::basic_string& abbrev, std::chrono::minutes& offset) + -> decltype(date::from_stream(std::declval&>(), format, + tp, &abbrev, &offset), + parse_manip{format, tp, &abbrev, &offset}) +{ + return {format, tp, &abbrev, &offset}; +} + +// duration streaming + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, + const std::chrono::duration& d) +{ + return os << detail::make_string::from(d.count()) + + detail::get_units(typename Period::type{}); +} + +} // namespace date + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif + +#endif // DATE_H diff --git a/src/3rd_party/date/ios.h b/src/3rd_party/date/ios.h new file mode 100644 index 00000000..a9f86365 --- /dev/null +++ b/src/3rd_party/date/ios.h @@ -0,0 +1,50 @@ +// +// ios.h +// DateTimeLib +// +// The MIT License (MIT) +// +// Copyright (c) 2016 Alexander Kormanovsky +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef ios_hpp +#define ios_hpp + +#if __APPLE__ +# include +# if TARGET_OS_IPHONE +# include + + namespace date + { + namespace iOSUtils + { + + std::string get_tzdata_path(); + std::string get_current_timezone(); + + } // namespace iOSUtils + } // namespace date + +# endif // TARGET_OS_IPHONE +#else // !__APPLE__ +# define TARGET_OS_IPHONE 0 +#endif // !__APPLE__ +#endif // ios_hpp diff --git a/src/3rd_party/date/tz.cpp b/src/3rd_party/date/tz.cpp new file mode 100644 index 00000000..999403e2 --- /dev/null +++ b/src/3rd_party/date/tz.cpp @@ -0,0 +1,4073 @@ +// The MIT License (MIT) +// +// Copyright (c) 2015, 2016, 2017 Howard Hinnant +// Copyright (c) 2015 Ville Voutilainen +// Copyright (c) 2016 Alexander Kormanovsky +// Copyright (c) 2016, 2017 Jiangang Zhuang +// Copyright (c) 2017 Nicolas Veloz Savino +// Copyright (c) 2017 Florian Dang +// Copyright (c) 2017 Aaron Bishop +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// Our apologies. When the previous paragraph was written, lowercase had not yet +// been invented (that would involve another several millennia of evolution). +// We did not mean to shout. + +#ifdef _WIN32 + // windows.h will be included directly and indirectly (e.g. by curl). + // We need to define these macros to prevent windows.h bringing in + // more than we need and do it early so windows.h doesn't get included + // without these macros having been defined. + // min/max macros interfere with the C++ versions. +# ifndef NOMINMAX +# define NOMINMAX +# endif + // We don't need all that Windows has to offer. +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif + + // for wcstombs +# ifndef _CRT_SECURE_NO_WARNINGS +# define _CRT_SECURE_NO_WARNINGS +# endif + + // None of this happens with the MS SDK (at least VS14 which I tested), but: + // Compiling with mingw, we get "error: 'KF_FLAG_DEFAULT' was not declared in this scope." + // and error: 'SHGetKnownFolderPath' was not declared in this scope.". + // It seems when using mingw NTDDI_VERSION is undefined and that + // causes KNOWN_FOLDER_FLAG and the KF_ flags to not get defined. + // So we must define NTDDI_VERSION to get those flags on mingw. + // The docs say though here: + // https://msdn.microsoft.com/en-nz/library/windows/desktop/aa383745(v=vs.85).aspx + // that "If you define NTDDI_VERSION, you must also define _WIN32_WINNT." + // So we declare we require Vista or greater. +# ifdef __MINGW32__ + +# ifndef NTDDI_VERSION +# define NTDDI_VERSION 0x06000000 +# define _WIN32_WINNT _WIN32_WINNT_VISTA +# elif NTDDI_VERSION < 0x06000000 +# warning "If this fails to compile NTDDI_VERSION may be to low. See comments above." +# endif + // But once we define the values above we then get this linker error: + // "tz.cpp:(.rdata$.refptr.FOLDERID_Downloads[.refptr.FOLDERID_Downloads]+0x0): " + // "undefined reference to `FOLDERID_Downloads'" + // which #include cures see: + // https://support.microsoft.com/en-us/kb/130869 +# include + // But with included, the error moves on to: + // error: 'FOLDERID_Downloads' was not declared in this scope + // Which #include cures. +# include + +# endif // __MINGW32__ + +# include +#endif // _WIN32 + +#include "date/tz_private.h" + +#ifdef __APPLE__ +# include "date/ios.h" +#else +# define TARGET_OS_IPHONE 0 +# define TARGET_OS_SIMULATOR 0 +#endif + +#if USE_OS_TZDB +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if USE_OS_TZDB +# include +#endif +#include +#include +#include +#include +#include + +// unistd.h is used on some platforms as part of the the means to get +// the current time zone. On Win32 windows.h provides a means to do it. +// gcc/mingw supports unistd.h on Win32 but MSVC does not. + +#ifdef _WIN32 +# ifdef WINAPI_FAMILY +# include +# if WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP +# define WINRT +# define INSTALL . +# endif +# endif + +# include // _unlink etc. + +# if defined(__clang__) + struct IUnknown; // fix for issue with static_cast<> in objbase.h + // (see https://github.com/philsquared/Catch/issues/690) +# endif + +# include // CoTaskFree, ShGetKnownFolderPath etc. +# if HAS_REMOTE_API +# include // _mkdir +# include // ShFileOperation etc. +# endif // HAS_REMOTE_API +#else // !_WIN32 +# include +# if !USE_OS_TZDB && !defined(INSTALL) +# include +# endif +# include +# include +# if !USE_SHELL_API +# include +# include +# include +# include +# include +# include +# endif //!USE_SHELL_API +#endif // !_WIN32 + + +#if HAS_REMOTE_API + // Note curl includes windows.h so we must include curl AFTER definitions of things + // that affect windows.h such as NOMINMAX. +#if defined(_MSC_VER) && defined(SHORTENED_CURL_INCLUDE) + // For rmt_curl nuget package +# include +#else +# include +#endif +#endif + +#ifdef _WIN32 +static CONSTDATA char folder_delimiter = '\\'; +#else // !_WIN32 +static CONSTDATA char folder_delimiter = '/'; +#endif // !_WIN32 + +#if defined(__GNUC__) && __GNUC__ < 5 + // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif // defined(__GNUC__) && __GNUC__ < 5 + +#if !USE_OS_TZDB + +# ifdef _WIN32 +# ifndef WINRT + +namespace +{ + struct task_mem_deleter + { + void operator()(wchar_t buf[]) + { + if (buf != nullptr) + CoTaskMemFree(buf); + } + }; + using co_task_mem_ptr = std::unique_ptr; +} + +static +std::wstring +convert_utf8_to_utf16(const std::string& s) +{ + std::wstring out; + const int size = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, NULL, 0); + + if (size == 0) + { + std::string msg = "Failed to determine required size when converting \""; + msg += s; + msg += "\" to UTF-16."; + throw std::runtime_error(msg); + } + + out.resize(size); + const int check = MultiByteToWideChar(CP_UTF8, 0, s.c_str(), -1, &out[0], size); + + if (size != check) + { + std::string msg = "Failed to convert \""; + msg += s; + msg += "\" to UTF-16."; + throw std::runtime_error(msg); + } + + return out; +} + +// We might need to know certain locations even if not using the remote API, +// so keep these routines out of that block for now. +static +std::string +get_known_folder(const GUID& folderid) +{ + std::string folder; + PWSTR pfolder = nullptr; + HRESULT hr = SHGetKnownFolderPath(folderid, KF_FLAG_DEFAULT, nullptr, &pfolder); + if (SUCCEEDED(hr)) + { + co_task_mem_ptr folder_ptr(pfolder); + const wchar_t* fptr = folder_ptr.get(); + auto state = std::mbstate_t(); + const auto required = std::wcsrtombs(nullptr, &fptr, 0, &state); + if (required != 0 && required != std::size_t(-1)) + { + folder.resize(required); + std::wcsrtombs(&folder[0], &fptr, folder.size(), &state); + } + } + return folder; +} + +# ifndef INSTALL + +// Usually something like "c:\Users\username\Downloads". +static +std::string +get_download_folder() +{ + return get_known_folder(FOLDERID_Downloads); +} + +# endif // !INSTALL + +# endif // WINRT +# else // !_WIN32 + +# if !defined(INSTALL) + +static +std::string +expand_path(std::string path) +{ +# if TARGET_OS_IPHONE + return date::iOSUtils::get_tzdata_path(); +# else // !TARGET_OS_IPHONE + ::wordexp_t w{}; + std::unique_ptr<::wordexp_t, void(*)(::wordexp_t*)> hold{&w, ::wordfree}; + ::wordexp(path.c_str(), &w, 0); + if (w.we_wordc != 1) + throw std::runtime_error("Cannot expand path: " + path); + path = w.we_wordv[0]; + return path; +# endif // !TARGET_OS_IPHONE +} + +static +std::string +get_download_folder() +{ + return expand_path("~/Downloads"); +} + +# endif // !defined(INSTALL) + +# endif // !_WIN32 + +/* + * This class is provided to mimic the following usage of `ifstream`: + * + * std::ifstream is(filename); + * + * file_streambuf ibuf(filename); + * std::istream is(&ibuf); + * + * This is required because `ifstream` does not support opening files + * containing wide characters on Windows. On Windows, `file_streambuf` uses + * `file_open()` to convert the file name to UTF-16 before opening it with + * `_wfopen()`. + * + * Note that this is not an exact re-implementation of `ifstream`, + * but is enough for usage here. + * + * It is partially based on these two implementations: + * - fdinbuf from http://www.josuttis.com/cppcode/fdstream.html + * - stdiobuf https://stackoverflow.com/questions/12342542/convert-file-to-ifstream-c-android-ndk + * + * Apparently MSVC provides non-standard overloads of `ifstream` that support + * a `const wchar_t*` file name, but MinGW does not https://stackoverflow.com/a/822032 + */ +class file_streambuf + : public std::streambuf +{ +private: + FILE* file_; + static const int buffer_size_ = 1024; + char buffer_[buffer_size_]; + +public: + ~file_streambuf() + { + if (file_) + { + ::fclose(file_); + } + } + file_streambuf(const file_streambuf&) = delete; + file_streambuf& operator=(const file_streambuf&) = delete; + + file_streambuf(const std::string& filename) + : file_(file_open(filename)) + { + } + +protected: + virtual + int_type + underflow() + { + if (gptr() == egptr() && file_) + { + const size_t size = ::fread(buffer_, 1, buffer_size_, file_); + setg(buffer_, buffer_, buffer_ + size); + } + return (gptr() == egptr()) + ? traits_type::eof() + : traits_type::to_int_type(*gptr()); + } + +private: + FILE* + file_open(const std::string& filename) + { +# ifdef _WIN32 + std::wstring wfilename = convert_utf8_to_utf16(filename); + FILE* file = ::_wfopen(wfilename.c_str(), L"rb"); +# else // !_WIN32 + FILE* file = ::fopen(filename.c_str(), "rb"); +# endif // _WIN32 + if (file == NULL) + { + std::string msg = "Error opening file \""; + msg += filename; + msg += "\"."; + throw std::runtime_error(msg); + } + return file; + } +}; + +#endif // !USE_OS_TZDB + +namespace date +{ +// +---------------------+ +// | Begin Configuration | +// +---------------------+ + +using namespace detail; + +#if !USE_OS_TZDB + +static +std::string& +access_install() +{ + static std::string install +#ifndef INSTALL + + = get_download_folder() + folder_delimiter + "tzdata"; + +#else // !INSTALL + +# define STRINGIZEIMP(x) #x +# define STRINGIZE(x) STRINGIZEIMP(x) + + = STRINGIZE(INSTALL) + std::string(1, folder_delimiter) + "tzdata"; + + #undef STRINGIZEIMP + #undef STRINGIZE +#endif // !INSTALL + + return install; +} + +void +set_install(const std::string& install) +{ + access_install() = install; +} + +static +const std::string& +get_install() +{ + static const std::string& ref = access_install(); + return ref; +} + +#if HAS_REMOTE_API +static +std::string +get_download_gz_file(const std::string& version) +{ + auto file = get_install() + version + ".tar.gz"; + return file; +} +#endif // HAS_REMOTE_API + +#endif // !USE_OS_TZDB + +// These can be used to reduce the range of the database to save memory +CONSTDATA auto min_year = date::year::min(); +CONSTDATA auto max_year = date::year::max(); + +CONSTDATA auto min_day = date::January/1; +CONSTDATA auto max_day = date::December/31; + +#if USE_OS_TZDB + +CONSTCD14 const sys_seconds min_seconds = sys_days(min_year/min_day); + +#endif // USE_OS_TZDB + +#ifndef _WIN32 + +static +std::string +discover_tz_dir() +{ + struct stat sb; + using namespace std; +# ifndef __APPLE__ + CONSTDATA auto tz_dir_default = "/usr/share/zoneinfo"; + CONSTDATA auto tz_dir_buildroot = "/usr/share/zoneinfo/uclibc"; + + // Check special path which is valid for buildroot with uclibc builds + if(stat(tz_dir_buildroot, &sb) == 0 && S_ISDIR(sb.st_mode)) + return tz_dir_buildroot; + else if(stat(tz_dir_default, &sb) == 0 && S_ISDIR(sb.st_mode)) + return tz_dir_default; + else + throw runtime_error("discover_tz_dir failed to find zoneinfo\n"); +# else // __APPLE__ +# if TARGET_OS_IPHONE +# if TARGET_OS_SIMULATOR + return "/usr/share/zoneinfo"; +# else + return "/var/db/timezone/zoneinfo"; +# endif +# else + CONSTDATA auto timezone = "/etc/localtime"; + if (!(lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0)) + throw runtime_error("discover_tz_dir failed\n"); + string result; + char rp[PATH_MAX+1] = {}; + if (readlink(timezone, rp, sizeof(rp)-1) > 0) + result = string(rp); + else + throw system_error(errno, system_category(), "readlink() failed"); + auto i = result.find("zoneinfo"); + if (i == string::npos) + throw runtime_error("discover_tz_dir failed to find zoneinfo\n"); + i = result.find('/', i); + if (i == string::npos) + throw runtime_error("discover_tz_dir failed to find '/'\n"); + return result.substr(0, i); +# endif +# endif // __APPLE__ +} + +static +const std::string& +get_tz_dir() +{ + static const std::string tz_dir = discover_tz_dir(); + return tz_dir; +} + +#endif + +// +-------------------+ +// | End Configuration | +// +-------------------+ + +#ifndef _MSC_VER +static_assert(min_year <= max_year, "Configuration error"); +#endif + +static std::unique_ptr init_tzdb(); + +tzdb_list::~tzdb_list() +{ + const tzdb* ptr = head_; + head_ = nullptr; + while (ptr != nullptr) + { + auto next = ptr->next; + delete ptr; + ptr = next; + } +} + +tzdb_list::tzdb_list(tzdb_list&& x) NOEXCEPT + : head_{x.head_.exchange(nullptr)} +{ +} + +void +tzdb_list::push_front(tzdb* tzdb) NOEXCEPT +{ + tzdb->next = head_; + head_ = tzdb; +} + +tzdb_list::const_iterator +tzdb_list::erase_after(const_iterator p) NOEXCEPT +{ + auto t = p.p_->next; + p.p_->next = p.p_->next->next; + delete t; + return ++p; +} + +struct tzdb_list::undocumented_helper +{ + static void push_front(tzdb_list& db_list, tzdb* tzdb) NOEXCEPT + { + db_list.push_front(tzdb); + } +}; + +static +tzdb_list +create_tzdb() +{ + tzdb_list tz_db; + tzdb_list::undocumented_helper::push_front(tz_db, init_tzdb().release()); + return tz_db; +} + +tzdb_list& +get_tzdb_list() +{ + static tzdb_list tz_db = create_tzdb(); + return tz_db; +} + +static +std::string +parse3(std::istream& in) +{ + std::string r(3, ' '); + ws(in); + r[0] = static_cast(in.get()); + r[1] = static_cast(in.get()); + r[2] = static_cast(in.get()); + return r; +} + +static +unsigned +parse_month(std::istream& in) +{ + CONSTDATA char*const month_names[] = + {"Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + auto s = parse3(in); + auto m = std::find(std::begin(month_names), std::end(month_names), s) - month_names; + if (m >= std::end(month_names) - std::begin(month_names)) + throw std::runtime_error("oops: bad month name: " + s); + return static_cast(++m); +} + +#if !USE_OS_TZDB + +#ifdef _WIN32 + +static +void +sort_zone_mappings(std::vector& mappings) +{ + std::sort(mappings.begin(), mappings.end(), + [](const date::detail::timezone_mapping& lhs, + const date::detail::timezone_mapping& rhs)->bool + { + auto other_result = lhs.other.compare(rhs.other); + if (other_result < 0) + return true; + else if (other_result == 0) + { + auto territory_result = lhs.territory.compare(rhs.territory); + if (territory_result < 0) + return true; + else if (territory_result == 0) + { + if (lhs.type < rhs.type) + return true; + } + } + return false; + }); +} + +static +bool +native_to_standard_timezone_name(const std::string& native_tz_name, + std::string& standard_tz_name) +{ + // TOOD! Need be a case insensitive compare? + if (native_tz_name == "UTC") + { + standard_tz_name = "Etc/UTC"; + return true; + } + standard_tz_name.clear(); + // TODO! we can improve on linear search. + const auto& mappings = date::get_tzdb().mappings; + for (const auto& tzm : mappings) + { + if (tzm.other == native_tz_name) + { + standard_tz_name = tzm.type; + return true; + } + } + return false; +} + +// Parse this XML file: +// https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml +// The parsing method is designed to be simple and quick. It is not overly +// forgiving of change but it should diagnose basic format issues. +// See timezone_mapping structure for more info. +static +std::vector +load_timezone_mappings_from_xml_file(const std::string& input_path) +{ + std::size_t line_num = 0; + std::vector mappings; + std::string line; + + file_streambuf ibuf(input_path); + std::istream is(&ibuf); + + auto error = [&input_path, &line_num](const char* info) + { + std::string msg = "Error loading time zone mapping file \""; + msg += input_path; + msg += "\" at line "; + msg += std::to_string(line_num); + msg += ": "; + msg += info; + throw std::runtime_error(msg); + }; + // [optional space]a="b" + auto read_attribute = [&line, &error] + (const char* name, std::string& value, std::size_t startPos) + ->std::size_t + { + value.clear(); + // Skip leading space before attribute name. + std::size_t spos = line.find_first_not_of(' ', startPos); + if (spos == std::string::npos) + spos = startPos; + // Assume everything up to next = is the attribute name + // and that an = will always delimit that. + std::size_t epos = line.find('=', spos); + if (epos == std::string::npos) + error("Expected \'=\' right after attribute name."); + std::size_t name_len = epos - spos; + // Expect the name we find matches the name we expect. + if (line.compare(spos, name_len, name) != 0) + { + std::string msg; + msg = "Expected attribute name \'"; + msg += name; + msg += "\' around position "; + msg += std::to_string(spos); + msg += " but found something else."; + error(msg.c_str()); + } + ++epos; // Skip the '=' that is after the attribute name. + spos = epos; + if (spos < line.length() && line[spos] == '\"') + ++spos; // Skip the quote that is before the attribute value. + else + { + std::string msg = "Expected '\"' to begin value of attribute \'"; + msg += name; + msg += "\'."; + error(msg.c_str()); + } + epos = line.find('\"', spos); + if (epos == std::string::npos) + { + std::string msg = "Expected '\"' to end value of attribute \'"; + msg += name; + msg += "\'."; + error(msg.c_str()); + } + // Extract everything in between the quotes. Note no escaping is done. + std::size_t value_len = epos - spos; + value.assign(line, spos, value_len); + ++epos; // Skip the quote that is after the attribute value; + return epos; + }; + + // Quick but not overly forgiving XML mapping file processing. + bool mapTimezonesOpenTagFound = false; + bool mapTimezonesCloseTagFound = false; + std::size_t mapZonePos = std::string::npos; + std::size_t mapTimezonesPos = std::string::npos; + CONSTDATA char mapTimeZonesOpeningTag[] = { ""); + mapTimezonesCloseTagFound = (mapTimezonesPos != std::string::npos); + if (!mapTimezonesCloseTagFound) + { + std::size_t commentPos = line.find(" " << x.target_; +} + +// leap_second + +leap_second::leap_second(const std::string& s, detail::undocumented) +{ + using namespace date; + std::istringstream in(s); + in.exceptions(std::ios::failbit | std::ios::badbit); + std::string word; + int y; + MonthDayTime date; + in >> word >> y >> date; + date_ = date.to_time_point(year(y)); +} + +static +bool +file_exists(const std::string& filename) +{ +#ifdef _WIN32 + std::wstring wfilename = convert_utf8_to_utf16(filename); + return ::_waccess(wfilename.c_str(), 0) == 0; +#else + return ::access(filename.c_str(), F_OK) == 0; +#endif +} + +#if HAS_REMOTE_API + +// CURL tools + +namespace +{ + +struct curl_global_init_and_cleanup +{ + ~curl_global_init_and_cleanup() + { + ::curl_global_cleanup(); + } + curl_global_init_and_cleanup() + { + if (::curl_global_init(CURL_GLOBAL_DEFAULT) != 0) + throw std::runtime_error("CURL global initialization failed"); + } + curl_global_init_and_cleanup(curl_global_init_and_cleanup const&) = delete; + curl_global_init_and_cleanup& operator=(curl_global_init_and_cleanup const&) = delete; +}; + +struct curl_deleter +{ + void operator()(CURL* p) const + { + ::curl_easy_cleanup(p); + } +}; + +} // unnamed namespace + +static +std::unique_ptr +curl_init() +{ + static const curl_global_init_and_cleanup _{}; + return std::unique_ptr{::curl_easy_init()}; +} + +static +bool +download_to_string(const std::string& url, std::string& str) +{ + str.clear(); + auto curl = curl_init(); + if (!curl) + return false; + std::string version; + curl_easy_setopt(curl.get(), CURLOPT_USERAGENT, "curl"); + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_write_callback write_cb = [](char* contents, std::size_t size, std::size_t nmemb, + void* userp) -> std::size_t + { + auto& userstr = *static_cast(userp); + auto realsize = size * nmemb; + userstr.append(contents, realsize); + return realsize; + }; + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_cb); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &str); + curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false); + auto res = curl_easy_perform(curl.get()); + return (res == CURLE_OK); +} + +namespace +{ + enum class download_file_options { binary, text }; +} + +static +bool +download_to_file(const std::string& url, const std::string& local_filename, + download_file_options opts, char* error_buffer) +{ + auto curl = curl_init(); + if (!curl) + return false; + curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); + curl_easy_setopt(curl.get(), CURLOPT_SSL_VERIFYPEER, false); + if (error_buffer) + curl_easy_setopt(curl.get(), CURLOPT_ERRORBUFFER, error_buffer); + curl_write_callback write_cb = [](char* contents, std::size_t size, std::size_t nmemb, + void* userp) -> std::size_t + { + auto& of = *static_cast(userp); + auto realsize = size * nmemb; + of.write(contents, static_cast(realsize)); + return realsize; + }; + curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, write_cb); + decltype(curl_easy_perform(curl.get())) res; + { + std::ofstream of(local_filename, + opts == download_file_options::binary ? + std::ofstream::out | std::ofstream::binary : + std::ofstream::out); + of.exceptions(std::ios::badbit); + curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &of); + res = curl_easy_perform(curl.get()); + } + return res == CURLE_OK; +} + +std::string +remote_version() +{ + std::string version; + std::string str; + if (download_to_string("https://www.iana.org/time-zones", str)) + { + CONSTDATA char db[] = "/time-zones/releases/tzdata"; + CONSTDATA auto db_size = sizeof(db) - 1; + auto p = str.find(db, 0, db_size); + const int ver_str_len = 5; + if (p != std::string::npos && p + (db_size + ver_str_len) <= str.size()) + version = str.substr(p + db_size, ver_str_len); + } + return version; +} + + +// TODO! Using system() create a process and a console window. +// This is useful to see what errors may occur but is slow and distracting. +// Consider implementing this functionality more directly, such as +// using _mkdir and CreateProcess etc. +// But use the current means now as matches Unix implementations and while +// in proof of concept / testing phase. +// TODO! Use eventually. +static +bool +remove_folder_and_subfolders(const std::string& folder) +{ +# ifdef _WIN32 +# if USE_SHELL_API + // Delete the folder contents by deleting the folder. + std::string cmd = "rd /s /q \""; + cmd += folder; + cmd += '\"'; + return std::system(cmd.c_str()) == EXIT_SUCCESS; +# else // !USE_SHELL_API + // Create a buffer containing the path to delete. It must be terminated + // by two nuls. Who designs these API's... + std::vector from; + from.assign(folder.begin(), folder.end()); + from.push_back('\0'); + from.push_back('\0'); + SHFILEOPSTRUCT fo{}; // Zero initialize. + fo.wFunc = FO_DELETE; + fo.pFrom = from.data(); + fo.fFlags = FOF_NO_UI; + int ret = SHFileOperation(&fo); + if (ret == 0 && !fo.fAnyOperationsAborted) + return true; + return false; +# endif // !USE_SHELL_API +# else // !_WIN32 +# if USE_SHELL_API + return std::system(("rm -R " + folder).c_str()) == EXIT_SUCCESS; +# else // !USE_SHELL_API + struct dir_deleter { + dir_deleter() {} + void operator()(DIR* d) const + { + if (d != nullptr) + { + int result = closedir(d); + assert(result == 0); + } + } + }; + using closedir_ptr = std::unique_ptr; + + std::string filename; + struct stat statbuf; + std::size_t folder_len = folder.length(); + struct dirent* p = nullptr; + + closedir_ptr d(opendir(folder.c_str())); + bool r = d.get() != nullptr; + while (r && (p=readdir(d.get())) != nullptr) + { + if (strcmp(p->d_name, ".") == 0 || strcmp(p->d_name, "..") == 0) + continue; + + // + 2 for path delimiter and nul terminator. + std::size_t buf_len = folder_len + strlen(p->d_name) + 2; + filename.resize(buf_len); + std::size_t path_len = static_cast( + snprintf(&filename[0], buf_len, "%s/%s", folder.c_str(), p->d_name)); + assert(path_len == buf_len - 1); + filename.resize(path_len); + + if (stat(filename.c_str(), &statbuf) == 0) + r = S_ISDIR(statbuf.st_mode) + ? remove_folder_and_subfolders(filename) + : unlink(filename.c_str()) == 0; + } + d.reset(); + + if (r) + r = rmdir(folder.c_str()) == 0; + + return r; +# endif // !USE_SHELL_API +# endif // !_WIN32 +} + +static +bool +make_directory(const std::string& folder) +{ +# ifdef _WIN32 +# if USE_SHELL_API + // Re-create the folder. + std::string cmd = "mkdir \""; + cmd += folder; + cmd += '\"'; + return std::system(cmd.c_str()) == EXIT_SUCCESS; +# else // !USE_SHELL_API + return _mkdir(folder.c_str()) == 0; +# endif // !USE_SHELL_API +# else // !_WIN32 +# if USE_SHELL_API + return std::system(("mkdir -p " + folder).c_str()) == EXIT_SUCCESS; +# else // !USE_SHELL_API + return mkdir(folder.c_str(), 0777) == 0; +# endif // !USE_SHELL_API +# endif // !_WIN32 +} + +static +bool +delete_file(const std::string& file) +{ +# ifdef _WIN32 +# if USE_SHELL_API + std::string cmd = "del \""; + cmd += file; + cmd += '\"'; + return std::system(cmd.c_str()) == 0; +# else // !USE_SHELL_API + return _unlink(file.c_str()) == 0; +# endif // !USE_SHELL_API +# else // !_WIN32 +# if USE_SHELL_API + return std::system(("rm " + file).c_str()) == EXIT_SUCCESS; +# else // !USE_SHELL_API + return unlink(file.c_str()) == 0; +# endif // !USE_SHELL_API +# endif // !_WIN32 +} + +# ifdef _WIN32 + +static +bool +move_file(const std::string& from, const std::string& to) +{ +# if USE_SHELL_API + std::string cmd = "move \""; + cmd += from; + cmd += "\" \""; + cmd += to; + cmd += '\"'; + return std::system(cmd.c_str()) == EXIT_SUCCESS; +# else // !USE_SHELL_API + return !!::MoveFile(from.c_str(), to.c_str()); +# endif // !USE_SHELL_API +} + +// Usually something like "c:\Program Files". +static +std::string +get_program_folder() +{ + return get_known_folder(FOLDERID_ProgramFiles); +} + +// Note folder can and usually does contain spaces. +static +std::string +get_unzip_program() +{ + std::string path; + + // 7-Zip appears to note its location in the registry. + // If that doesn't work, fall through and take a guess, but it will likely be wrong. + HKEY hKey = nullptr; + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\7-Zip", 0, KEY_READ, &hKey) == ERROR_SUCCESS) + { + char value_buffer[MAX_PATH + 1]; // fyi 260 at time of writing. + // in/out parameter. Documentation say that size is a count of bytes not chars. + DWORD size = sizeof(value_buffer) - sizeof(value_buffer[0]); + DWORD tzi_type = REG_SZ; + // Testing shows Path key value is "C:\Program Files\7-Zip\" i.e. always with trailing \. + bool got_value = (RegQueryValueExA(hKey, "Path", nullptr, &tzi_type, + reinterpret_cast(value_buffer), &size) == ERROR_SUCCESS); + RegCloseKey(hKey); // Close now incase of throw later. + if (got_value) + { + // Function does not guarantee to null terminate. + value_buffer[size / sizeof(value_buffer[0])] = '\0'; + path = value_buffer; + if (!path.empty()) + { + path += "7z.exe"; + return path; + } + } + } + path += get_program_folder(); + path += folder_delimiter; + path += "7-Zip\\7z.exe"; + return path; +} + +# if !USE_SHELL_API +static +int +run_program(const std::string& command) +{ + STARTUPINFO si{}; + si.cb = sizeof(si); + PROCESS_INFORMATION pi{}; + + // Allegedly CreateProcess overwrites the command line. Ugh. + std::string mutable_command(command); + if (CreateProcess(nullptr, &mutable_command[0], + nullptr, nullptr, FALSE, CREATE_NO_WINDOW, nullptr, nullptr, &si, &pi)) + { + WaitForSingleObject(pi.hProcess, INFINITE); + DWORD exit_code; + bool got_exit_code = !!GetExitCodeProcess(pi.hProcess, &exit_code); + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + // Not 100% sure about this still active thing is correct, + // but I'm going with it because I *think* WaitForSingleObject might + // return in some cases without INFINITE-ly waiting. + // But why/wouldn't GetExitCodeProcess return false in that case? + if (got_exit_code && exit_code != STILL_ACTIVE) + return static_cast(exit_code); + } + return EXIT_FAILURE; +} +# endif // !USE_SHELL_API + +static +std::string +get_download_tar_file(const std::string& version) +{ + auto file = get_install(); + file += folder_delimiter; + file += "tzdata"; + file += version; + file += ".tar"; + return file; +} + +static +bool +extract_gz_file(const std::string& version, const std::string& gz_file, + const std::string& dest_folder) +{ + auto unzip_prog = get_unzip_program(); + bool unzip_result = false; + // Use the unzip program to extract the tar file from the archive. + + // Aim to create a string like: + // "C:\Program Files\7-Zip\7z.exe" x "C:\Users\SomeUser\Downloads\tzdata2016d.tar.gz" + // -o"C:\Users\SomeUser\Downloads\tzdata" + std::string cmd; + cmd = '\"'; + cmd += unzip_prog; + cmd += "\" x \""; + cmd += gz_file; + cmd += "\" -o\""; + cmd += dest_folder; + cmd += '\"'; + +# if USE_SHELL_API + // When using shelling out with std::system() extra quotes are required around the + // whole command. It's weird but necessary it seems, see: + // http://stackoverflow.com/q/27975969/576911 + + cmd = "\"" + cmd + "\""; + if (std::system(cmd.c_str()) == EXIT_SUCCESS) + unzip_result = true; +# else // !USE_SHELL_API + if (run_program(cmd) == EXIT_SUCCESS) + unzip_result = true; +# endif // !USE_SHELL_API + if (unzip_result) + delete_file(gz_file); + + // Use the unzip program extract the data from the tar file that was + // just extracted from the archive. + auto tar_file = get_download_tar_file(version); + cmd = '\"'; + cmd += unzip_prog; + cmd += "\" x \""; + cmd += tar_file; + cmd += "\" -o\""; + cmd += get_install(); + cmd += '\"'; +# if USE_SHELL_API + cmd = "\"" + cmd + "\""; + if (std::system(cmd.c_str()) == EXIT_SUCCESS) + unzip_result = true; +# else // !USE_SHELL_API + if (run_program(cmd) == EXIT_SUCCESS) + unzip_result = true; +# endif // !USE_SHELL_API + + if (unzip_result) + delete_file(tar_file); + + return unzip_result; +} + +static +std::string +get_download_mapping_file(const std::string& version) +{ + auto file = get_install() + version + "windowsZones.xml"; + return file; +} + +# else // !_WIN32 + +# if !USE_SHELL_API +static +int +run_program(const char* prog, const char*const args[]) +{ + pid_t pid = fork(); + if (pid == -1) // Child failed to start. + return EXIT_FAILURE; + + if (pid != 0) + { + // We are in the parent. Child started. Wait for it. + pid_t ret; + int status; + while ((ret = waitpid(pid, &status, 0)) == -1) + { + if (errno != EINTR) + break; + } + if (ret != -1) + { + if (WIFEXITED(status)) + return WEXITSTATUS(status); + } + printf("Child issues!\n"); + + return EXIT_FAILURE; // Not sure what status of child is. + } + else // We are in the child process. Start the program the parent wants to run. + { + + if (execv(prog, const_cast(args)) == -1) // Does not return. + { + perror("unreachable 0\n"); + _Exit(127); + } + printf("unreachable 2\n"); + } + printf("unreachable 2\n"); + // Unreachable. + assert(false); + exit(EXIT_FAILURE); + return EXIT_FAILURE; +} +# endif // !USE_SHELL_API + +static +bool +extract_gz_file(const std::string&, const std::string& gz_file, const std::string&) +{ +# if USE_SHELL_API + bool unzipped = std::system(("tar -xzf " + gz_file + " -C " + get_install()).c_str()) == EXIT_SUCCESS; +# else // !USE_SHELL_API + const char prog[] = {"/usr/bin/tar"}; + const char*const args[] = + { + prog, "-xzf", gz_file.c_str(), "-C", get_install().c_str(), nullptr + }; + bool unzipped = (run_program(prog, args) == EXIT_SUCCESS); +# endif // !USE_SHELL_API + if (unzipped) + { + delete_file(gz_file); + return true; + } + return false; +} + +# endif // !_WIN32 + +bool +remote_download(const std::string& version, char* error_buffer) +{ + assert(!version.empty()); + +# ifdef _WIN32 + // Download folder should be always available for Windows +# else // !_WIN32 + // Create download folder if it does not exist on UNIX system + auto download_folder = get_install(); + if (!file_exists(download_folder)) + { + if (!make_directory(download_folder)) + return false; + } +# endif // _WIN32 + + auto url = "https://data.iana.org/time-zones/releases/tzdata" + version + + ".tar.gz"; + bool result = download_to_file(url, get_download_gz_file(version), + download_file_options::binary, error_buffer); +# ifdef _WIN32 + if (result) + { + auto mapping_file = get_download_mapping_file(version); + result = download_to_file( + "https://raw.githubusercontent.com/unicode-org/cldr/master/" + "common/supplemental/windowsZones.xml", + mapping_file, download_file_options::text, error_buffer); + } +# endif // _WIN32 + return result; +} + +bool +remote_install(const std::string& version) +{ + auto success = false; + assert(!version.empty()); + + std::string install = get_install(); + auto gz_file = get_download_gz_file(version); + if (file_exists(gz_file)) + { + if (file_exists(install)) + remove_folder_and_subfolders(install); + if (make_directory(install)) + { + if (extract_gz_file(version, gz_file, install)) + success = true; +# ifdef _WIN32 + auto mapping_file_source = get_download_mapping_file(version); + auto mapping_file_dest = get_install(); + mapping_file_dest += folder_delimiter; + mapping_file_dest += "windowsZones.xml"; + if (!move_file(mapping_file_source, mapping_file_dest)) + success = false; +# endif // _WIN32 + } + } + return success; +} + +#endif // HAS_REMOTE_API + +static +std::string +get_version(const std::string& path) +{ + std::string version; + + std::string path_version = path + "version"; + + if (file_exists(path_version)) + { + file_streambuf inbuf(path_version); + std::istream infile(&inbuf); + + infile >> version; + + if (!infile.fail()) + return version; + } + + std::string path_news = path + "NEWS"; + + if (file_exists(path_news)) + { + file_streambuf inbuf(path_news); + std::istream infile(&inbuf); + + while (infile) + { + infile >> version; + if (version == "Release") + { + infile >> version; + return version; + } + } + } + + throw std::runtime_error("Unable to get Timezone database version from " + path); +} + +static +std::unique_ptr +init_tzdb() +{ + using namespace date; + const std::string install = get_install(); + const std::string path = install + folder_delimiter; + std::string line; + bool continue_zone = false; + std::unique_ptr db(new tzdb); + +#if AUTO_DOWNLOAD + if (!file_exists(install)) + { + auto rv = remote_version(); + if (!rv.empty() && remote_download(rv)) + { + if (!remote_install(rv)) + { + std::string msg = "Timezone database version \""; + msg += rv; + msg += "\" did not install correctly to \""; + msg += install; + msg += "\""; + throw std::runtime_error(msg); + } + } + if (!file_exists(install)) + { + std::string msg = "Timezone database not found at \""; + msg += install; + msg += "\""; + throw std::runtime_error(msg); + } + db->version = get_version(path); + } + else + { + db->version = get_version(path); + auto rv = remote_version(); + if (!rv.empty() && db->version != rv) + { + if (remote_download(rv)) + { + remote_install(rv); + db->version = get_version(path); + } + } + } +#else // !AUTO_DOWNLOAD + if (!file_exists(install)) + { + std::string msg = "Timezone database not found at \""; + msg += install; + msg += "\""; + throw std::runtime_error(msg); + } + db->version = get_version(path); +#endif // !AUTO_DOWNLOAD + + CONSTDATA char*const files[] = + { + "africa", "antarctica", "asia", "australasia", "backward", "etcetera", "europe", + "pacificnew", "northamerica", "southamerica", "systemv", "leapseconds" + }; + + for (const auto& filename : files) + { + std::string file_path = path + filename; + if (!file_exists(file_path)) + { + continue; + } + file_streambuf inbuf(file_path); + std::istream infile(&inbuf); + while (infile) + { + std::getline(infile, line); + if (!line.empty() && line[0] != '#') + { + std::istringstream in(line); + std::string word; + in >> word; + if (word == "Rule") + { + db->rules.push_back(Rule(line)); + continue_zone = false; + } + else if (word == "Link") + { + db->links.push_back(time_zone_link(line)); + continue_zone = false; + } + else if (word == "Leap") + { + db->leap_seconds.push_back(leap_second(line, detail::undocumented{})); + continue_zone = false; + } + else if (word == "Zone") + { + db->zones.push_back(time_zone(line, detail::undocumented{})); + continue_zone = true; + } + else if (line[0] == '\t' && continue_zone) + { + db->zones.back().add(line); + } + else if (word.size() > 0 && word[0] == '#') + { + continue; + } + else + { + std::cerr << line << '\n'; + } + } + } + } + std::sort(db->rules.begin(), db->rules.end()); + Rule::split_overlaps(db->rules); + std::sort(db->zones.begin(), db->zones.end()); + db->zones.shrink_to_fit(); + std::sort(db->links.begin(), db->links.end()); + db->links.shrink_to_fit(); + std::sort(db->leap_seconds.begin(), db->leap_seconds.end()); + db->leap_seconds.shrink_to_fit(); + +#ifdef _WIN32 + std::string mapping_file = get_install() + folder_delimiter + "windowsZones.xml"; + db->mappings = load_timezone_mappings_from_xml_file(mapping_file); + sort_zone_mappings(db->mappings); +#endif // _WIN32 + + return db; +} + +const tzdb& +reload_tzdb() +{ +#if AUTO_DOWNLOAD + auto const& v = get_tzdb_list().front().version; + if (!v.empty() && v == remote_version()) + return get_tzdb_list().front(); +#endif // AUTO_DOWNLOAD + tzdb_list::undocumented_helper::push_front(get_tzdb_list(), init_tzdb().release()); + return get_tzdb_list().front(); +} + +#endif // !USE_OS_TZDB + +const tzdb& +get_tzdb() +{ + return get_tzdb_list().front(); +} + +const time_zone* +#if HAS_STRING_VIEW +tzdb::locate_zone(std::string_view tz_name) const +#else +tzdb::locate_zone(const std::string& tz_name) const +#endif +{ + auto zi = std::lower_bound(zones.begin(), zones.end(), tz_name, +#if HAS_STRING_VIEW + [](const time_zone& z, const std::string_view& nm) +#else + [](const time_zone& z, const std::string& nm) +#endif + { + return z.name() < nm; + }); + if (zi == zones.end() || zi->name() != tz_name) + { +#if !USE_OS_TZDB + auto li = std::lower_bound(links.begin(), links.end(), tz_name, +#if HAS_STRING_VIEW + [](const time_zone_link& z, const std::string_view& nm) +#else + [](const time_zone_link& z, const std::string& nm) +#endif + { + return z.name() < nm; + }); + if (li != links.end() && li->name() == tz_name) + { + zi = std::lower_bound(zones.begin(), zones.end(), li->target(), + [](const time_zone& z, const std::string& nm) + { + return z.name() < nm; + }); + if (zi != zones.end() && zi->name() == li->target()) + return &*zi; + } +#endif // !USE_OS_TZDB + throw std::runtime_error(std::string(tz_name) + " not found in timezone database"); + } + return &*zi; +} + +const time_zone* +#if HAS_STRING_VIEW +locate_zone(std::string_view tz_name) +#else +locate_zone(const std::string& tz_name) +#endif +{ + return get_tzdb().locate_zone(tz_name); +} + +#if USE_OS_TZDB + +std::ostream& +operator<<(std::ostream& os, const tzdb& db) +{ + os << "Version: " << db.version << "\n\n"; + for (const auto& x : db.zones) + os << x << '\n'; + os << '\n'; + for (const auto& x : db.leap_seconds) + os << x << '\n'; + return os; +} + +#else // !USE_OS_TZDB + +std::ostream& +operator<<(std::ostream& os, const tzdb& db) +{ + os << "Version: " << db.version << '\n'; + std::string title("--------------------------------------------" + "--------------------------------------------\n" + "Name ""Start Y ""End Y " + "Beginning ""Offset " + "Designator\n" + "--------------------------------------------" + "--------------------------------------------\n"); + int count = 0; + for (const auto& x : db.rules) + { + if (count++ % 50 == 0) + os << title; + os << x << '\n'; + } + os << '\n'; + title = std::string("---------------------------------------------------------" + "--------------------------------------------------------\n" + "Name ""Offset " + "Rule ""Abrev ""Until\n" + "---------------------------------------------------------" + "--------------------------------------------------------\n"); + count = 0; + for (const auto& x : db.zones) + { + if (count++ % 10 == 0) + os << title; + os << x << '\n'; + } + os << '\n'; + title = std::string("---------------------------------------------------------" + "--------------------------------------------------------\n" + "Alias ""To\n" + "---------------------------------------------------------" + "--------------------------------------------------------\n"); + count = 0; + for (const auto& x : db.links) + { + if (count++ % 45 == 0) + os << title; + os << x << '\n'; + } + os << '\n'; + title = std::string("---------------------------------------------------------" + "--------------------------------------------------------\n" + "Leap second on\n" + "---------------------------------------------------------" + "--------------------------------------------------------\n"); + os << title; + for (const auto& x : db.leap_seconds) + os << x << '\n'; + return os; +} + +#endif // !USE_OS_TZDB + +// ----------------------- + +#ifdef _WIN32 + +static +std::string +getTimeZoneKeyName() +{ + DYNAMIC_TIME_ZONE_INFORMATION dtzi{}; + auto result = GetDynamicTimeZoneInformation(&dtzi); + if (result == TIME_ZONE_ID_INVALID) + throw std::runtime_error("current_zone(): GetDynamicTimeZoneInformation()" + " reported TIME_ZONE_ID_INVALID."); + auto wlen = wcslen(dtzi.TimeZoneKeyName); + char buf[128] = {}; + assert(sizeof(buf) >= wlen+1); + wcstombs(buf, dtzi.TimeZoneKeyName, wlen); + if (strcmp(buf, "Coordinated Universal Time") == 0) + return "UTC"; + return buf; +} + +const time_zone* +tzdb::current_zone() const +{ + std::string win_tzid = getTimeZoneKeyName(); + std::string standard_tzid; + if (!native_to_standard_timezone_name(win_tzid, standard_tzid)) + { + std::string msg; + msg = "current_zone() failed: A mapping from the Windows Time Zone id \""; + msg += win_tzid; + msg += "\" was not found in the time zone mapping database."; + throw std::runtime_error(msg); + } + return locate_zone(standard_tzid); +} + +#else // !_WIN32 + +#if HAS_STRING_VIEW + +static +std::string_view +extract_tz_name(char const* rp) +{ + using namespace std; + string_view result = rp; + CONSTDATA string_view zoneinfo = "zoneinfo"; + size_t pos = result.rfind(zoneinfo); + if (pos == result.npos) + throw runtime_error( + "current_zone() failed to find \"zoneinfo\" in " + string(result)); + pos = result.find('/', pos); + result.remove_prefix(pos + 1); + return result; +} + +#else // !HAS_STRING_VIEW + +static +std::string +extract_tz_name(char const* rp) +{ + using namespace std; + string result = rp; + CONSTDATA char zoneinfo[] = "zoneinfo"; + size_t pos = result.rfind(zoneinfo); + if (pos == result.npos) + throw runtime_error( + "current_zone() failed to find \"zoneinfo\" in " + result); + pos = result.find('/', pos); + result.erase(0, pos + 1); + return result; +} + +#endif // HAS_STRING_VIEW + +static +bool +sniff_realpath(const char* timezone) +{ + using namespace std; + char rp[PATH_MAX+1] = {}; + if (realpath(timezone, rp) == nullptr) + throw system_error(errno, system_category(), "realpath() failed"); + auto result = extract_tz_name(rp); + return result != "posixrules"; +} + +const time_zone* +tzdb::current_zone() const +{ + // On some OS's a file called /etc/localtime may + // exist and it may be either a real file + // containing time zone details or a symlink to such a file. + // On MacOS and BSD Unix if this file is a symlink it + // might resolve to a path like this: + // "/usr/share/zoneinfo/America/Los_Angeles" + // If it does, we try to determine the current + // timezone from the remainder of the path by removing the prefix + // and hoping the rest resolves to a valid timezone. + // It may not always work though. If it doesn't then an + // exception will be thrown by local_timezone. + // The path may also take a relative form: + // "../usr/share/zoneinfo/America/Los_Angeles". + { + struct stat sb; + CONSTDATA auto timezone = "/etc/localtime"; + if (lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0) + { + using namespace std; + static const bool use_realpath = sniff_realpath(timezone); + char rp[PATH_MAX+1] = {}; + if (use_realpath) + { + if (realpath(timezone, rp) == nullptr) + throw system_error(errno, system_category(), "realpath() failed"); + } + else + { + if (readlink(timezone, rp, sizeof(rp)-1) <= 0) + throw system_error(errno, system_category(), "readlink() failed"); + } + return locate_zone(extract_tz_name(rp)); + } + } + // On embedded systems e.g. buildroot with uclibc the timezone is linked + // into /etc/TZ which is a symlink to path like this: + // "/usr/share/zoneinfo/uclibc/America/Los_Angeles" + // If it does, we try to determine the current + // timezone from the remainder of the path by removing the prefix + // and hoping the rest resolves to valid timezone. + // It may not always work though. If it doesn't then an + // exception will be thrown by local_timezone. + // The path may also take a relative form: + // "../usr/share/zoneinfo/uclibc/America/Los_Angeles". + { + struct stat sb; + CONSTDATA auto timezone = "/etc/TZ"; + if (lstat(timezone, &sb) == 0 && S_ISLNK(sb.st_mode) && sb.st_size > 0) { + using namespace std; + string result; + char rp[PATH_MAX+1] = {}; + if (readlink(timezone, rp, sizeof(rp)-1) > 0) + result = string(rp); + else + throw system_error(errno, system_category(), "readlink() failed"); + + const size_t pos = result.find(get_tz_dir()); + if (pos != result.npos) + result.erase(0, get_tz_dir().size() + 1 + pos); + return locate_zone(result); + } + } + { + // On some versions of some linux distro's (e.g. Ubuntu), + // the current timezone might be in the first line of + // the /etc/timezone file. + std::ifstream timezone_file("/etc/timezone"); + if (timezone_file.is_open()) + { + std::string result; + std::getline(timezone_file, result); + if (!result.empty()) + return locate_zone(result); + } + // Fall through to try other means. + } + { + // On some versions of some bsd distro's (e.g. FreeBSD), + // the current timezone might be in the first line of + // the /var/db/zoneinfo file. + std::ifstream timezone_file("/var/db/zoneinfo"); + if (timezone_file.is_open()) + { + std::string result; + std::getline(timezone_file, result); + if (!result.empty()) + return locate_zone(result); + } + // Fall through to try other means. + } + { + // On some versions of some bsd distro's (e.g. iOS), + // it is not possible to use file based approach, + // we switch to system API, calling functions in + // CoreFoundation framework. +#if TARGET_OS_IPHONE + std::string result = date::iOSUtils::get_current_timezone(); + if (!result.empty()) + return locate_zone(result); +#endif + // Fall through to try other means. + } + { + // On some versions of some linux distro's (e.g. Red Hat), + // the current timezone might be in the first line of + // the /etc/sysconfig/clock file as: + // ZONE="US/Eastern" + std::ifstream timezone_file("/etc/sysconfig/clock"); + std::string result; + while (timezone_file) + { + std::getline(timezone_file, result); + auto p = result.find("ZONE=\""); + if (p != std::string::npos) + { + result.erase(0, p+6); + result.erase(result.rfind('"')); + return locate_zone(result); + } + } + // Fall through to try other means. + } + throw std::runtime_error("Could not get current timezone"); +} + +#endif // !_WIN32 + +const time_zone* +current_zone() +{ + return get_tzdb().current_zone(); +} + +} // namespace date + +#if defined(__GNUC__) && __GNUC__ < 5 +# pragma GCC diagnostic pop +#endif diff --git a/src/3rd_party/date/tz.h b/src/3rd_party/date/tz.h new file mode 100644 index 00000000..61dde292 --- /dev/null +++ b/src/3rd_party/date/tz.h @@ -0,0 +1,2792 @@ +#ifndef TZ_H +#define TZ_H + +// The MIT License (MIT) +// +// Copyright (c) 2015, 2016, 2017 Howard Hinnant +// Copyright (c) 2017 Jiangang Zhuang +// Copyright (c) 2017 Aaron Bishop +// Copyright (c) 2017 Tomasz Kamiński +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// Our apologies. When the previous paragraph was written, lowercase had not yet +// been invented (that would involve another several millennia of evolution). +// We did not mean to shout. + +// Get more recent database at http://www.iana.org/time-zones + +// The notion of "current timezone" is something the operating system is expected to "just +// know". How it knows this is system specific. It's often a value set by the user at OS +// installation time and recorded by the OS somewhere. On Linux and Mac systems the current +// timezone name is obtained by looking at the name or contents of a particular file on +// disk. On Windows the current timezone name comes from the registry. In either method, +// there is no guarantee that the "native" current timezone name obtained will match any +// of the "Standard" names in this library's "database". On Linux, the names usually do +// seem to match so mapping functions to map from native to "Standard" are typically not +// required. On Windows, the names are never "Standard" so mapping is always required. +// Technically any OS may use the mapping process but currently only Windows does use it. + +#ifndef USE_OS_TZDB +# define USE_OS_TZDB 0 +#endif + +#ifndef HAS_REMOTE_API +# if USE_OS_TZDB == 0 +# ifdef _WIN32 +# define HAS_REMOTE_API 0 +# else +# define HAS_REMOTE_API 1 +# endif +# else // HAS_REMOTE_API makes no sense when using the OS timezone database +# define HAS_REMOTE_API 0 +# endif +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wconstant-logical-operand" +#endif + +static_assert(!(USE_OS_TZDB && HAS_REMOTE_API), + "USE_OS_TZDB and HAS_REMOTE_API can not be used together"); + +#ifdef __clang__ +# pragma clang diagnostic pop +#endif + +#ifndef AUTO_DOWNLOAD +# define AUTO_DOWNLOAD HAS_REMOTE_API +#endif + +static_assert(HAS_REMOTE_API == 0 ? AUTO_DOWNLOAD == 0 : true, + "AUTO_DOWNLOAD can not be turned on without HAS_REMOTE_API"); + +#ifndef USE_SHELL_API +# define USE_SHELL_API 1 +#endif + +#if USE_OS_TZDB +# ifdef _WIN32 +# error "USE_OS_TZDB can not be used on Windows" +# endif +#endif + +#ifndef HAS_DEDUCTION_GUIDES +# if __cplusplus >= 201703 +# define HAS_DEDUCTION_GUIDES 1 +# else +# define HAS_DEDUCTION_GUIDES 0 +# endif +#endif // HAS_DEDUCTION_GUIDES + +#include "date.h" + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#include "tz_private.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# ifdef DATE_BUILD_DLL +# define DATE_API __declspec(dllexport) +# elif defined(DATE_USE_DLL) +# define DATE_API __declspec(dllimport) +# else +# define DATE_API +# endif +#else +# ifdef DATE_BUILD_DLL +# define DATE_API __attribute__ ((visibility ("default"))) +# else +# define DATE_API +# endif +#endif + +namespace date +{ + +enum class choose {earliest, latest}; + +namespace detail +{ + struct undocumented; + + template + struct nodeduct + { + using type = T; + }; + + template + using nodeduct_t = typename nodeduct::type; +} + +struct sys_info +{ + sys_seconds begin; + sys_seconds end; + std::chrono::seconds offset; + std::chrono::minutes save; + std::string abbrev; +}; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const sys_info& r) +{ + os << r.begin << '\n'; + os << r.end << '\n'; + os << make_time(r.offset) << "\n"; + os << make_time(r.save) << "\n"; + os << r.abbrev << '\n'; + return os; +} + +struct local_info +{ + enum {unique, nonexistent, ambiguous} result; + sys_info first; + sys_info second; +}; + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const local_info& r) +{ + if (r.result == local_info::nonexistent) + os << "nonexistent between\n"; + else if (r.result == local_info::ambiguous) + os << "ambiguous between\n"; + os << r.first; + if (r.result != local_info::unique) + { + os << "and\n"; + os << r.second; + } + return os; +} + +class nonexistent_local_time + : public std::runtime_error +{ +public: + template + nonexistent_local_time(local_time tp, const local_info& i); + +private: + template + static + std::string + make_msg(local_time tp, const local_info& i); +}; + +template +inline +nonexistent_local_time::nonexistent_local_time(local_time tp, + const local_info& i) + : std::runtime_error(make_msg(tp, i)) +{ +} + +template +std::string +nonexistent_local_time::make_msg(local_time tp, const local_info& i) +{ + assert(i.result == local_info::nonexistent); + std::ostringstream os; + os << tp << " is in a gap between\n" + << local_seconds{i.first.end.time_since_epoch()} + i.first.offset << ' ' + << i.first.abbrev << " and\n" + << local_seconds{i.second.begin.time_since_epoch()} + i.second.offset << ' ' + << i.second.abbrev + << " which are both equivalent to\n" + << i.first.end << " UTC"; + return os.str(); +} + +class ambiguous_local_time + : public std::runtime_error +{ +public: + template + ambiguous_local_time(local_time tp, const local_info& i); + +private: + template + static + std::string + make_msg(local_time tp, const local_info& i); +}; + +template +inline +ambiguous_local_time::ambiguous_local_time(local_time tp, const local_info& i) + : std::runtime_error(make_msg(tp, i)) +{ +} + +template +std::string +ambiguous_local_time::make_msg(local_time tp, const local_info& i) +{ + assert(i.result == local_info::ambiguous); + std::ostringstream os; + os << tp << " is ambiguous. It could be\n" + << tp << ' ' << i.first.abbrev << " == " + << tp - i.first.offset << " UTC or\n" + << tp << ' ' << i.second.abbrev << " == " + << tp - i.second.offset << " UTC"; + return os.str(); +} + +class time_zone; + +#if HAS_STRING_VIEW +DATE_API const time_zone* locate_zone(std::string_view tz_name); +#else +DATE_API const time_zone* locate_zone(const std::string& tz_name); +#endif + +DATE_API const time_zone* current_zone(); + +template +struct zoned_traits +{ +}; + +template <> +struct zoned_traits +{ + static + const time_zone* + default_zone() + { + return date::locate_zone("Etc/UTC"); + } + +#if HAS_STRING_VIEW + + static + const time_zone* + locate_zone(std::string_view name) + { + return date::locate_zone(name); + } + +#else // !HAS_STRING_VIEW + + static + const time_zone* + locate_zone(const std::string& name) + { + return date::locate_zone(name); + } + + static + const time_zone* + locate_zone(const char* name) + { + return date::locate_zone(name); + } + +#endif // !HAS_STRING_VIEW +}; + +template +class zoned_time; + +template +bool +operator==(const zoned_time& x, + const zoned_time& y); + +template +class zoned_time +{ +public: + using duration = typename std::common_type::type; + +private: + TimeZonePtr zone_; + sys_time tp_; + +public: +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ::default_zone())> +#endif + zoned_time(); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ::default_zone())> +#endif + zoned_time(const sys_time& st); + explicit zoned_time(TimeZonePtr z); + +#if HAS_STRING_VIEW + template ::locate_zone(std::string_view())) + >::value + >::type> + explicit zoned_time(std::string_view name); +#else +# if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ::locate_zone(std::string())) + >::value + >::type> +# endif + explicit zoned_time(const std::string& name); +#endif + + template , + sys_time>::value + >::type> + zoned_time(const zoned_time& zt) NOEXCEPT; + + zoned_time(TimeZonePtr z, const sys_time& st); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ()->to_sys(local_time{})), + sys_time + >::value + >::type> +#endif + zoned_time(TimeZonePtr z, const local_time& tp); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ()->to_sys(local_time{}, + choose::earliest)), + sys_time + >::value + >::type> +#endif + zoned_time(TimeZonePtr z, const local_time& tp, choose c); + + template , + sys_time>::value + >::type> + zoned_time(TimeZonePtr z, const zoned_time& zt); + + template , + sys_time>::value + >::type> + zoned_time(TimeZonePtr z, const zoned_time& zt, choose); + +#if HAS_STRING_VIEW + + template ::locate_zone(std::string_view())), + sys_time + >::value + >::type> + zoned_time(std::string_view name, detail::nodeduct_t&> st); + + template ::locate_zone(std::string_view())), + local_time + >::value + >::type> + zoned_time(std::string_view name, detail::nodeduct_t&> tp); + + template ::locate_zone(std::string_view())), + local_time, + choose + >::value + >::type> + zoned_time(std::string_view name, detail::nodeduct_t&> tp, choose c); + + template , + sys_time>::value && + std::is_constructible + < + zoned_time, + decltype(zoned_traits::locate_zone(std::string_view())), + zoned_time + >::value + >::type> + zoned_time(std::string_view name, const zoned_time& zt); + + template , + sys_time>::value && + std::is_constructible + < + zoned_time, + decltype(zoned_traits::locate_zone(std::string_view())), + zoned_time, + choose + >::value + >::type> + zoned_time(std::string_view name, const zoned_time& zt, choose); + +#else // !HAS_STRING_VIEW + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ::locate_zone(std::string())), + sys_time + >::value + >::type> +#endif + zoned_time(const std::string& name, const sys_time& st); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ::locate_zone(std::string())), + sys_time + >::value + >::type> +#endif + zoned_time(const char* name, const sys_time& st); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ::locate_zone(std::string())), + local_time + >::value + >::type> +#endif + zoned_time(const std::string& name, const local_time& tp); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ::locate_zone(std::string())), + local_time + >::value + >::type> +#endif + zoned_time(const char* name, const local_time& tp); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ::locate_zone(std::string())), + local_time, + choose + >::value + >::type> +#endif + zoned_time(const std::string& name, const local_time& tp, choose c); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template ::locate_zone(std::string())), + local_time, + choose + >::value + >::type> +#endif + zoned_time(const char* name, const local_time& tp, choose c); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template , + sys_time>::value && + std::is_constructible + < + zoned_time, + decltype(zoned_traits::locate_zone(std::string())), + zoned_time + >::value + >::type> +#else + template +#endif + zoned_time(const std::string& name, const zoned_time& zt); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template , + sys_time>::value && + std::is_constructible + < + zoned_time, + decltype(zoned_traits::locate_zone(std::string())), + zoned_time + >::value + >::type> +#else + template +#endif + zoned_time(const char* name, const zoned_time& zt); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template , + sys_time>::value && + std::is_constructible + < + zoned_time, + decltype(zoned_traits::locate_zone(std::string())), + zoned_time, + choose + >::value + >::type> +#else + template +#endif + zoned_time(const std::string& name, const zoned_time& zt, + choose); + +#if !defined(_MSC_VER) || (_MSC_VER > 1916) + template , + sys_time>::value && + std::is_constructible + < + zoned_time, + decltype(zoned_traits::locate_zone(std::string())), + zoned_time, + choose + >::value + >::type> +#else + template +#endif + zoned_time(const char* name, const zoned_time& zt, + choose); + +#endif // !HAS_STRING_VIEW + + zoned_time& operator=(const sys_time& st); + zoned_time& operator=(const local_time& ut); + + explicit operator sys_time() const; + explicit operator local_time() const; + + TimeZonePtr get_time_zone() const; + local_time get_local_time() const; + sys_time get_sys_time() const; + sys_info get_info() const; + + template + friend + bool + operator==(const zoned_time& x, + const zoned_time& y); + + template + friend + std::basic_ostream& + operator<<(std::basic_ostream& os, + const zoned_time& t); + +private: + template friend class zoned_time; + + template + static + TimeZonePtr2&& + check(TimeZonePtr2&& p); +}; + +using zoned_seconds = zoned_time; + +#if HAS_DEDUCTION_GUIDES + +namespace detail +{ + template + using time_zone_representation = + std::conditional_t + < + std::is_convertible::value, + time_zone const*, + std::remove_cv_t> + >; +} + +zoned_time() + -> zoned_time; + +template +zoned_time(sys_time) + -> zoned_time>; + +template +zoned_time(TimeZonePtrOrName&&) + -> zoned_time>; + +template +zoned_time(TimeZonePtrOrName&&, sys_time) + -> zoned_time, detail::time_zone_representation>; + +template +zoned_time(TimeZonePtrOrName&&, local_time, choose = choose::earliest) + -> zoned_time, detail::time_zone_representation>; + +template +zoned_time(TimeZonePtrOrName&&, zoned_time, choose = choose::earliest) + -> zoned_time, detail::time_zone_representation>; + +#endif // HAS_DEDUCTION_GUIDES + +template +inline +bool +operator==(const zoned_time& x, + const zoned_time& y) +{ + return x.zone_ == y.zone_ && x.tp_ == y.tp_; +} + +template +inline +bool +operator!=(const zoned_time& x, + const zoned_time& y) +{ + return !(x == y); +} + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) + +namespace detail +{ +# if USE_OS_TZDB + struct transition; + struct expanded_ttinfo; +# else // !USE_OS_TZDB + struct zonelet; + class Rule; +# endif // !USE_OS_TZDB +} + +#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900) + +class time_zone +{ +private: + std::string name_; +#if USE_OS_TZDB + std::vector transitions_; + std::vector ttinfos_; +#else // !USE_OS_TZDB + std::vector zonelets_; +#endif // !USE_OS_TZDB + std::unique_ptr adjusted_; + +public: +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) + time_zone(time_zone&&) = default; + time_zone& operator=(time_zone&&) = default; +#else // defined(_MSC_VER) && (_MSC_VER < 1900) + time_zone(time_zone&& src); + time_zone& operator=(time_zone&& src); +#endif // defined(_MSC_VER) && (_MSC_VER < 1900) + + DATE_API explicit time_zone(const std::string& s, detail::undocumented); + + const std::string& name() const NOEXCEPT; + + template sys_info get_info(sys_time st) const; + template local_info get_info(local_time tp) const; + + template + sys_time::type> + to_sys(local_time tp) const; + + template + sys_time::type> + to_sys(local_time tp, choose z) const; + + template + local_time::type> + to_local(sys_time tp) const; + + friend bool operator==(const time_zone& x, const time_zone& y) NOEXCEPT; + friend bool operator< (const time_zone& x, const time_zone& y) NOEXCEPT; + friend DATE_API std::ostream& operator<<(std::ostream& os, const time_zone& z); + +#if !USE_OS_TZDB + DATE_API void add(const std::string& s); +#endif // !USE_OS_TZDB + +private: + DATE_API sys_info get_info_impl(sys_seconds tp) const; + DATE_API local_info get_info_impl(local_seconds tp) const; + + template + sys_time::type> + to_sys_impl(local_time tp, choose z, std::false_type) const; + template + sys_time::type> + to_sys_impl(local_time tp, choose, std::true_type) const; + +#if USE_OS_TZDB + DATE_API void init() const; + DATE_API void init_impl(); + DATE_API sys_info + load_sys_info(std::vector::const_iterator i) const; + + template + DATE_API void + load_data(std::istream& inf, std::int32_t tzh_leapcnt, std::int32_t tzh_timecnt, + std::int32_t tzh_typecnt, std::int32_t tzh_charcnt); +#else // !USE_OS_TZDB + DATE_API sys_info get_info_impl(sys_seconds tp, int tz_int) const; + DATE_API void adjust_infos(const std::vector& rules); + DATE_API void parse_info(std::istream& in); +#endif // !USE_OS_TZDB +}; + +#if defined(_MSC_VER) && (_MSC_VER < 1900) + +inline +time_zone::time_zone(time_zone&& src) + : name_(std::move(src.name_)) + , zonelets_(std::move(src.zonelets_)) + , adjusted_(std::move(src.adjusted_)) + {} + +inline +time_zone& +time_zone::operator=(time_zone&& src) +{ + name_ = std::move(src.name_); + zonelets_ = std::move(src.zonelets_); + adjusted_ = std::move(src.adjusted_); + return *this; +} + +#endif // defined(_MSC_VER) && (_MSC_VER < 1900) + +inline +const std::string& +time_zone::name() const NOEXCEPT +{ + return name_; +} + +template +inline +sys_info +time_zone::get_info(sys_time st) const +{ + return get_info_impl(date::floor(st)); +} + +template +inline +local_info +time_zone::get_info(local_time tp) const +{ + return get_info_impl(date::floor(tp)); +} + +template +inline +sys_time::type> +time_zone::to_sys(local_time tp) const +{ + return to_sys_impl(tp, choose{}, std::true_type{}); +} + +template +inline +sys_time::type> +time_zone::to_sys(local_time tp, choose z) const +{ + return to_sys_impl(tp, z, std::false_type{}); +} + +template +inline +local_time::type> +time_zone::to_local(sys_time tp) const +{ + using LT = local_time::type>; + auto i = get_info(tp); + return LT{(tp + i.offset).time_since_epoch()}; +} + +inline bool operator==(const time_zone& x, const time_zone& y) NOEXCEPT {return x.name_ == y.name_;} +inline bool operator< (const time_zone& x, const time_zone& y) NOEXCEPT {return x.name_ < y.name_;} + +inline bool operator!=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(x == y);} +inline bool operator> (const time_zone& x, const time_zone& y) NOEXCEPT {return y < x;} +inline bool operator<=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(y < x);} +inline bool operator>=(const time_zone& x, const time_zone& y) NOEXCEPT {return !(x < y);} + +template +sys_time::type> +time_zone::to_sys_impl(local_time tp, choose z, std::false_type) const +{ + auto i = get_info(tp); + if (i.result == local_info::nonexistent) + { + return i.first.end; + } + else if (i.result == local_info::ambiguous) + { + if (z == choose::latest) + return sys_time{tp.time_since_epoch()} - i.second.offset; + } + return sys_time{tp.time_since_epoch()} - i.first.offset; +} + +template +sys_time::type> +time_zone::to_sys_impl(local_time tp, choose, std::true_type) const +{ + auto i = get_info(tp); + if (i.result == local_info::nonexistent) + throw nonexistent_local_time(tp, i); + else if (i.result == local_info::ambiguous) + throw ambiguous_local_time(tp, i); + return sys_time{tp.time_since_epoch()} - i.first.offset; +} + +#if !USE_OS_TZDB + +class time_zone_link +{ +private: + std::string name_; + std::string target_; +public: + DATE_API explicit time_zone_link(const std::string& s); + + const std::string& name() const {return name_;} + const std::string& target() const {return target_;} + + friend bool operator==(const time_zone_link& x, const time_zone_link& y) {return x.name_ == y.name_;} + friend bool operator< (const time_zone_link& x, const time_zone_link& y) {return x.name_ < y.name_;} + + friend DATE_API std::ostream& operator<<(std::ostream& os, const time_zone_link& x); +}; + +using link = time_zone_link; + +inline bool operator!=(const time_zone_link& x, const time_zone_link& y) {return !(x == y);} +inline bool operator> (const time_zone_link& x, const time_zone_link& y) {return y < x;} +inline bool operator<=(const time_zone_link& x, const time_zone_link& y) {return !(y < x);} +inline bool operator>=(const time_zone_link& x, const time_zone_link& y) {return !(x < y);} + +#endif // !USE_OS_TZDB + +class leap_second +{ +private: + sys_seconds date_; + +public: +#if USE_OS_TZDB + DATE_API explicit leap_second(const sys_seconds& s, detail::undocumented); +#else + DATE_API explicit leap_second(const std::string& s, detail::undocumented); +#endif + + sys_seconds date() const {return date_;} + + friend bool operator==(const leap_second& x, const leap_second& y) {return x.date_ == y.date_;} + friend bool operator< (const leap_second& x, const leap_second& y) {return x.date_ < y.date_;} + + template + friend + bool + operator==(const leap_second& x, const sys_time& y) + { + return x.date_ == y; + } + + template + friend + bool + operator< (const leap_second& x, const sys_time& y) + { + return x.date_ < y; + } + + template + friend + bool + operator< (const sys_time& x, const leap_second& y) + { + return x < y.date_; + } + + friend DATE_API std::ostream& operator<<(std::ostream& os, const leap_second& x); +}; + +inline bool operator!=(const leap_second& x, const leap_second& y) {return !(x == y);} +inline bool operator> (const leap_second& x, const leap_second& y) {return y < x;} +inline bool operator<=(const leap_second& x, const leap_second& y) {return !(y < x);} +inline bool operator>=(const leap_second& x, const leap_second& y) {return !(x < y);} + +template +inline +bool +operator==(const sys_time& x, const leap_second& y) +{ + return y == x; +} + +template +inline +bool +operator!=(const leap_second& x, const sys_time& y) +{ + return !(x == y); +} + +template +inline +bool +operator!=(const sys_time& x, const leap_second& y) +{ + return !(x == y); +} + +template +inline +bool +operator> (const leap_second& x, const sys_time& y) +{ + return y < x; +} + +template +inline +bool +operator> (const sys_time& x, const leap_second& y) +{ + return y < x; +} + +template +inline +bool +operator<=(const leap_second& x, const sys_time& y) +{ + return !(y < x); +} + +template +inline +bool +operator<=(const sys_time& x, const leap_second& y) +{ + return !(y < x); +} + +template +inline +bool +operator>=(const leap_second& x, const sys_time& y) +{ + return !(x < y); +} + +template +inline +bool +operator>=(const sys_time& x, const leap_second& y) +{ + return !(x < y); +} + +using leap = leap_second; + +#ifdef _WIN32 + +namespace detail +{ + +// The time zone mapping is modelled after this data file: +// http://unicode.org/repos/cldr/trunk/common/supplemental/windowsZones.xml +// and the field names match the element names from the mapZone element +// of windowsZones.xml. +// The website displays this file here: +// http://www.unicode.org/cldr/charts/latest/supplemental/zone_tzid.html +// The html view is sorted before being displayed but is otherwise the same +// There is a mapping between the os centric view (in this case windows) +// the html displays uses and the generic view the xml file. +// That mapping is this: +// display column "windows" -> xml field "other". +// display column "region" -> xml field "territory". +// display column "tzid" -> xml field "type". +// This structure uses the generic terminology because it could be +// used to to support other os/native name conversions, not just windows, +// and using the same generic names helps retain the connection to the +// origin of the data that we are using. +struct timezone_mapping +{ + timezone_mapping(const char* other, const char* territory, const char* type) + : other(other), territory(territory), type(type) + { + } + timezone_mapping() = default; + std::string other; + std::string territory; + std::string type; +}; + +} // detail + +#endif // _WIN32 + +struct tzdb +{ + std::string version = "unknown"; + std::vector zones; +#if !USE_OS_TZDB + std::vector links; +#endif + std::vector leap_seconds; +#if !USE_OS_TZDB + std::vector rules; +#endif +#ifdef _WIN32 + std::vector mappings; +#endif + tzdb* next = nullptr; + + tzdb() = default; +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) + tzdb(tzdb&&) = default; + tzdb& operator=(tzdb&&) = default; +#else // defined(_MSC_VER) && (_MSC_VER < 1900) + tzdb(tzdb&& src) + : version(std::move(src.version)) + , zones(std::move(src.zones)) + , links(std::move(src.links)) + , leap_seconds(std::move(src.leap_seconds)) + , rules(std::move(src.rules)) + , mappings(std::move(src.mappings)) + {} + + tzdb& operator=(tzdb&& src) + { + version = std::move(src.version); + zones = std::move(src.zones); + links = std::move(src.links); + leap_seconds = std::move(src.leap_seconds); + rules = std::move(src.rules); + mappings = std::move(src.mappings); + return *this; + } +#endif // defined(_MSC_VER) && (_MSC_VER < 1900) + +#if HAS_STRING_VIEW + const time_zone* locate_zone(std::string_view tz_name) const; +#else + const time_zone* locate_zone(const std::string& tz_name) const; +#endif + const time_zone* current_zone() const; +}; + +using TZ_DB = tzdb; + +DATE_API std::ostream& +operator<<(std::ostream& os, const tzdb& db); + +DATE_API const tzdb& get_tzdb(); + +class tzdb_list +{ + std::atomic head_{nullptr}; + +public: + ~tzdb_list(); + tzdb_list() = default; + tzdb_list(tzdb_list&& x) NOEXCEPT; + + const tzdb& front() const NOEXCEPT {return *head_;} + tzdb& front() NOEXCEPT {return *head_;} + + class const_iterator; + + const_iterator begin() const NOEXCEPT; + const_iterator end() const NOEXCEPT; + + const_iterator cbegin() const NOEXCEPT; + const_iterator cend() const NOEXCEPT; + + const_iterator erase_after(const_iterator p) NOEXCEPT; + + struct undocumented_helper; +private: + void push_front(tzdb* tzdb) NOEXCEPT; +}; + +class tzdb_list::const_iterator +{ + tzdb* p_ = nullptr; + + explicit const_iterator(tzdb* p) NOEXCEPT : p_{p} {} +public: + const_iterator() = default; + + using iterator_category = std::forward_iterator_tag; + using value_type = tzdb; + using reference = const value_type&; + using pointer = const value_type*; + using difference_type = std::ptrdiff_t; + + reference operator*() const NOEXCEPT {return *p_;} + pointer operator->() const NOEXCEPT {return p_;} + + const_iterator& operator++() NOEXCEPT {p_ = p_->next; return *this;} + const_iterator operator++(int) NOEXCEPT {auto t = *this; ++(*this); return t;} + + friend + bool + operator==(const const_iterator& x, const const_iterator& y) NOEXCEPT + {return x.p_ == y.p_;} + + friend + bool + operator!=(const const_iterator& x, const const_iterator& y) NOEXCEPT + {return !(x == y);} + + friend class tzdb_list; +}; + +inline +tzdb_list::const_iterator +tzdb_list::begin() const NOEXCEPT +{ + return const_iterator{head_}; +} + +inline +tzdb_list::const_iterator +tzdb_list::end() const NOEXCEPT +{ + return const_iterator{nullptr}; +} + +inline +tzdb_list::const_iterator +tzdb_list::cbegin() const NOEXCEPT +{ + return begin(); +} + +inline +tzdb_list::const_iterator +tzdb_list::cend() const NOEXCEPT +{ + return end(); +} + +DATE_API tzdb_list& get_tzdb_list(); + +#if !USE_OS_TZDB + +DATE_API const tzdb& reload_tzdb(); +DATE_API void set_install(const std::string& install); + +#endif // !USE_OS_TZDB + +#if HAS_REMOTE_API + +DATE_API std::string remote_version(); +// if provided error_buffer size should be at least CURL_ERROR_SIZE +DATE_API bool remote_download(const std::string& version, char* error_buffer = nullptr); +DATE_API bool remote_install(const std::string& version); + +#endif + +// zoned_time + +namespace detail +{ + +template +inline +T* +to_raw_pointer(T* p) NOEXCEPT +{ + return p; +} + +template +inline +auto +to_raw_pointer(Pointer p) NOEXCEPT + -> decltype(detail::to_raw_pointer(p.operator->())) +{ + return detail::to_raw_pointer(p.operator->()); +} + +} // namespace detail + +template +template +inline +TimeZonePtr2&& +zoned_time::check(TimeZonePtr2&& p) +{ + if (detail::to_raw_pointer(p) == nullptr) + throw std::runtime_error( + "zoned_time constructed with a time zone pointer == nullptr"); + return std::forward(p); +} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time() + : zone_(check(zoned_traits::default_zone())) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(const sys_time& st) + : zone_(check(zoned_traits::default_zone())) + , tp_(st) + {} + +template +inline +zoned_time::zoned_time(TimeZonePtr z) + : zone_(check(std::move(z))) + {} + +#if HAS_STRING_VIEW + +template +template +inline +zoned_time::zoned_time(std::string_view name) + : zoned_time(zoned_traits::locate_zone(name)) + {} + +#else // !HAS_STRING_VIEW + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(const std::string& name) + : zoned_time(zoned_traits::locate_zone(name)) + {} + +#endif // !HAS_STRING_VIEW + +template +template +inline +zoned_time::zoned_time(const zoned_time& zt) NOEXCEPT + : zone_(zt.zone_) + , tp_(zt.tp_) + {} + +template +inline +zoned_time::zoned_time(TimeZonePtr z, const sys_time& st) + : zone_(check(std::move(z))) + , tp_(st) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(TimeZonePtr z, const local_time& t) + : zone_(check(std::move(z))) + , tp_(zone_->to_sys(t)) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(TimeZonePtr z, const local_time& t, + choose c) + : zone_(check(std::move(z))) + , tp_(zone_->to_sys(t, c)) + {} + +template +template +inline +zoned_time::zoned_time(TimeZonePtr z, + const zoned_time& zt) + : zone_(check(std::move(z))) + , tp_(zt.tp_) + {} + +template +template +inline +zoned_time::zoned_time(TimeZonePtr z, + const zoned_time& zt, choose) + : zoned_time(std::move(z), zt) + {} + +#if HAS_STRING_VIEW + +template +template +inline +zoned_time::zoned_time(std::string_view name, + detail::nodeduct_t&> st) + : zoned_time(zoned_traits::locate_zone(name), st) + {} + +template +template +inline +zoned_time::zoned_time(std::string_view name, + detail::nodeduct_t&> t) + : zoned_time(zoned_traits::locate_zone(name), t) + {} + +template +template +inline +zoned_time::zoned_time(std::string_view name, + detail::nodeduct_t&> t, choose c) + : zoned_time(zoned_traits::locate_zone(name), t, c) + {} + +template +template +inline +zoned_time::zoned_time(std::string_view name, + const zoned_time& zt) + : zoned_time(zoned_traits::locate_zone(name), zt) + {} + +template +template +inline +zoned_time::zoned_time(std::string_view name, + const zoned_time& zt, + choose c) + : zoned_time(zoned_traits::locate_zone(name), zt, c) + {} + +#else // !HAS_STRING_VIEW + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(const std::string& name, + const sys_time& st) + : zoned_time(zoned_traits::locate_zone(name), st) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(const char* name, + const sys_time& st) + : zoned_time(zoned_traits::locate_zone(name), st) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(const std::string& name, + const local_time& t) + : zoned_time(zoned_traits::locate_zone(name), t) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(const char* name, + const local_time& t) + : zoned_time(zoned_traits::locate_zone(name), t) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(const std::string& name, + const local_time& t, choose c) + : zoned_time(zoned_traits::locate_zone(name), t, c) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#endif +inline +zoned_time::zoned_time(const char* name, + const local_time& t, choose c) + : zoned_time(zoned_traits::locate_zone(name), t, c) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#else +template +#endif +inline +zoned_time::zoned_time(const std::string& name, + const zoned_time& zt) + : zoned_time(zoned_traits::locate_zone(name), zt) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#else +template +#endif +inline +zoned_time::zoned_time(const char* name, + const zoned_time& zt) + : zoned_time(zoned_traits::locate_zone(name), zt) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#else +template +#endif +inline +zoned_time::zoned_time(const std::string& name, + const zoned_time& zt, + choose c) + : zoned_time(zoned_traits::locate_zone(name), zt, c) + {} + +template +#if !defined(_MSC_VER) || (_MSC_VER > 1916) +template +#else +template +#endif +inline +zoned_time::zoned_time(const char* name, + const zoned_time& zt, + choose c) + : zoned_time(zoned_traits::locate_zone(name), zt, c) + {} + +#endif // HAS_STRING_VIEW + +template +inline +zoned_time& +zoned_time::operator=(const sys_time& st) +{ + tp_ = st; + return *this; +} + +template +inline +zoned_time& +zoned_time::operator=(const local_time& ut) +{ + tp_ = zone_->to_sys(ut); + return *this; +} + +template +inline +zoned_time::operator local_time::duration>() const +{ + return get_local_time(); +} + +template +inline +zoned_time::operator sys_time::duration>() const +{ + return get_sys_time(); +} + +template +inline +TimeZonePtr +zoned_time::get_time_zone() const +{ + return zone_; +} + +template +inline +local_time::duration> +zoned_time::get_local_time() const +{ + return zone_->to_local(tp_); +} + +template +inline +sys_time::duration> +zoned_time::get_sys_time() const +{ + return tp_; +} + +template +inline +sys_info +zoned_time::get_info() const +{ + return zone_->get_info(tp_); +} + +// make_zoned_time + +inline +zoned_time +make_zoned() +{ + return zoned_time(); +} + +template +inline +zoned_time::type> +make_zoned(const sys_time& tp) +{ + return zoned_time::type>(tp); +} + +template 1916) +#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600) + , class = typename std::enable_if + < + std::is_class + < + typename std::decay + < + decltype(*detail::to_raw_pointer(std::declval())) + >::type + >{} + >::type +#endif +#endif + > +inline +zoned_time +make_zoned(TimeZonePtr z) +{ + return zoned_time(std::move(z)); +} + +inline +zoned_seconds +make_zoned(const std::string& name) +{ + return zoned_seconds(name); +} + +template 1916) +#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600) + , class = typename std::enable_if + < + std::is_class())>::type>{} + >::type +#endif +#endif + > +inline +zoned_time::type, TimeZonePtr> +make_zoned(TimeZonePtr zone, const local_time& tp) +{ + return zoned_time::type, + TimeZonePtr>(std::move(zone), tp); +} + +template 1916) +#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600) + , class = typename std::enable_if + < + std::is_class())>::type>{} + >::type +#endif +#endif + > +inline +zoned_time::type, TimeZonePtr> +make_zoned(TimeZonePtr zone, const local_time& tp, choose c) +{ + return zoned_time::type, + TimeZonePtr>(std::move(zone), tp, c); +} + +template +inline +zoned_time::type> +make_zoned(const std::string& name, const local_time& tp) +{ + return zoned_time::type>(name, tp); +} + +template +inline +zoned_time::type> +make_zoned(const std::string& name, const local_time& tp, choose c) +{ + return zoned_time::type>(name, tp, c); +} + +template +inline +zoned_time +make_zoned(TimeZonePtr zone, const zoned_time& zt) +{ + return zoned_time(std::move(zone), zt); +} + +template +inline +zoned_time +make_zoned(const std::string& name, const zoned_time& zt) +{ + return zoned_time(name, zt); +} + +template +inline +zoned_time +make_zoned(TimeZonePtr zone, const zoned_time& zt, choose c) +{ + return zoned_time(std::move(zone), zt, c); +} + +template +inline +zoned_time +make_zoned(const std::string& name, const zoned_time& zt, choose c) +{ + return zoned_time(name, zt, c); +} + +template 1916) +#if !defined(__INTEL_COMPILER) || (__INTEL_COMPILER > 1600) + , class = typename std::enable_if + < + std::is_class())>::type>{} + >::type +#endif +#endif + > +inline +zoned_time::type, TimeZonePtr> +make_zoned(TimeZonePtr zone, const sys_time& st) +{ + return zoned_time::type, + TimeZonePtr>(std::move(zone), st); +} + +template +inline +zoned_time::type> +make_zoned(const std::string& name, const sys_time& st) +{ + return zoned_time::type>(name, st); +} + +template +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const zoned_time& tp) +{ + using duration = typename zoned_time::duration; + using LT = local_time; + auto const st = tp.get_sys_time(); + auto const info = tp.get_time_zone()->get_info(st); + return to_stream(os, fmt, LT{(st+info.offset).time_since_epoch()}, + &info.abbrev, &info.offset); +} + +template +inline +std::basic_ostream& +operator<<(std::basic_ostream& os, const zoned_time& t) +{ + const CharT fmt[] = {'%', 'F', ' ', '%', 'T', ' ', '%', 'Z', CharT{}}; + return to_stream(os, fmt, t); +} + +class utc_clock +{ +public: + using duration = std::chrono::system_clock::duration; + using rep = duration::rep; + using period = duration::period; + using time_point = std::chrono::time_point; + static CONSTDATA bool is_steady = false; + + static time_point now(); + + template + static + std::chrono::time_point::type> + to_sys(const std::chrono::time_point&); + + template + static + std::chrono::time_point::type> + from_sys(const std::chrono::time_point&); + + template + static + std::chrono::time_point::type> + to_local(const std::chrono::time_point&); + + template + static + std::chrono::time_point::type> + from_local(const std::chrono::time_point&); +}; + +template + using utc_time = std::chrono::time_point; + +using utc_seconds = utc_time; + +template +utc_time::type> +utc_clock::from_sys(const sys_time& st) +{ + using std::chrono::seconds; + using CD = typename std::common_type::type; + auto const& leaps = get_tzdb().leap_seconds; + auto const lt = std::upper_bound(leaps.begin(), leaps.end(), st); + return utc_time{st.time_since_epoch() + seconds{lt-leaps.begin()}}; +} + +// Return pair +// first is true if ut is during a leap second insertion, otherwise false. +// If ut is during a leap second insertion, that leap second is included in the count +template +std::pair +is_leap_second(date::utc_time const& ut) +{ + using std::chrono::seconds; + using duration = typename std::common_type::type; + auto const& leaps = get_tzdb().leap_seconds; + auto tp = sys_time{ut.time_since_epoch()}; + auto const lt = std::upper_bound(leaps.begin(), leaps.end(), tp); + auto ds = seconds{lt-leaps.begin()}; + tp -= ds; + auto ls = false; + if (lt > leaps.begin()) + { + if (tp < lt[-1]) + { + if (tp >= lt[-1].date() - seconds{1}) + ls = true; + else + --ds; + } + } + return {ls, ds}; +} + +struct leap_second_info +{ + bool is_leap_second; + std::chrono::seconds elapsed; +}; + +template +leap_second_info +get_leap_second_info(date::utc_time const& ut) +{ + auto p = is_leap_second(ut); + return {p.first, p.second}; +} + +template +sys_time::type> +utc_clock::to_sys(const utc_time& ut) +{ + using std::chrono::seconds; + using CD = typename std::common_type::type; + auto ls = is_leap_second(ut); + auto tp = sys_time{ut.time_since_epoch() - ls.second}; + if (ls.first) + tp = floor(tp) + seconds{1} - CD{1}; + return tp; +} + +inline +utc_clock::time_point +utc_clock::now() +{ + return from_sys(std::chrono::system_clock::now()); +} + +template +utc_time::type> +utc_clock::from_local(const local_time& st) +{ + return from_sys(sys_time{st.time_since_epoch()}); +} + +template +local_time::type> +utc_clock::to_local(const utc_time& ut) +{ + using CD = typename std::common_type::type; + return local_time{to_sys(ut).time_since_epoch()}; +} + +template +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const utc_time& t) +{ + using std::chrono::seconds; + using CT = typename std::common_type::type; + const std::string abbrev("UTC"); + CONSTDATA seconds offset{0}; + auto ls = is_leap_second(t); + auto tp = sys_time{t.time_since_epoch() - ls.second}; + auto const sd = floor(tp); + year_month_day ymd = sd; + auto time = make_time(tp - sys_seconds{sd}); + time.seconds(detail::undocumented{}) += seconds{ls.first}; + fields fds{ymd, time}; + return to_stream(os, fmt, fds, &abbrev, &offset); +} + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const utc_time& t) +{ + const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}}; + return to_stream(os, fmt, t); +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, + utc_time& tp, std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + using std::chrono::seconds; + using std::chrono::minutes; + using CT = typename std::common_type::type; + minutes offset_local{}; + auto offptr = offset ? offset : &offset_local; + fields fds{}; + fds.has_tod = true; + from_stream(is, fmt, fds, abbrev, offptr); + if (!fds.ymd.ok()) + is.setstate(std::ios::failbit); + if (!is.fail()) + { + bool is_60_sec = fds.tod.seconds() == seconds{60}; + if (is_60_sec) + fds.tod.seconds(detail::undocumented{}) -= seconds{1}; + auto tmp = utc_clock::from_sys(sys_days(fds.ymd) - *offptr + fds.tod.to_duration()); + if (is_60_sec) + tmp += seconds{1}; + if (is_60_sec != is_leap_second(tmp).first || !fds.tod.in_conventional_range()) + { + is.setstate(std::ios::failbit); + return is; + } + tp = std::chrono::time_point_cast(tmp); + } + return is; +} + +// tai_clock + +class tai_clock +{ +public: + using duration = std::chrono::system_clock::duration; + using rep = duration::rep; + using period = duration::period; + using time_point = std::chrono::time_point; + static const bool is_steady = false; + + static time_point now(); + + template + static + std::chrono::time_point::type> + to_utc(const std::chrono::time_point&) NOEXCEPT; + + template + static + std::chrono::time_point::type> + from_utc(const std::chrono::time_point&) NOEXCEPT; + + template + static + std::chrono::time_point::type> + to_local(const std::chrono::time_point&) NOEXCEPT; + + template + static + std::chrono::time_point::type> + from_local(const std::chrono::time_point&) NOEXCEPT; +}; + +template + using tai_time = std::chrono::time_point; + +using tai_seconds = tai_time; + +template +inline +utc_time::type> +tai_clock::to_utc(const tai_time& t) NOEXCEPT +{ + using std::chrono::seconds; + using CD = typename std::common_type::type; + return utc_time{t.time_since_epoch()} - + (sys_days(year{1970}/January/1) - sys_days(year{1958}/January/1) + seconds{10}); +} + +template +inline +tai_time::type> +tai_clock::from_utc(const utc_time& t) NOEXCEPT +{ + using std::chrono::seconds; + using CD = typename std::common_type::type; + return tai_time{t.time_since_epoch()} + + (sys_days(year{1970}/January/1) - sys_days(year{1958}/January/1) + seconds{10}); +} + +inline +tai_clock::time_point +tai_clock::now() +{ + return from_utc(utc_clock::now()); +} + +template +inline +local_time::type> +tai_clock::to_local(const tai_time& t) NOEXCEPT +{ + using CD = typename std::common_type::type; + return local_time{t.time_since_epoch()} - + (local_days(year{1970}/January/1) - local_days(year{1958}/January/1)); +} + +template +inline +tai_time::type> +tai_clock::from_local(const local_time& t) NOEXCEPT +{ + using CD = typename std::common_type::type; + return tai_time{t.time_since_epoch()} + + (local_days(year{1970}/January/1) - local_days(year{1958}/January/1)); +} + +template +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const tai_time& t) +{ + const std::string abbrev("TAI"); + CONSTDATA std::chrono::seconds offset{0}; + return to_stream(os, fmt, tai_clock::to_local(t), &abbrev, &offset); +} + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const tai_time& t) +{ + const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}}; + return to_stream(os, fmt, t); +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, + tai_time& tp, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + local_time lp; + from_stream(is, fmt, lp, abbrev, offset); + if (!is.fail()) + tp = tai_clock::from_local(lp); + return is; +} + +// gps_clock + +class gps_clock +{ +public: + using duration = std::chrono::system_clock::duration; + using rep = duration::rep; + using period = duration::period; + using time_point = std::chrono::time_point; + static const bool is_steady = false; + + static time_point now(); + + template + static + std::chrono::time_point::type> + to_utc(const std::chrono::time_point&) NOEXCEPT; + + template + static + std::chrono::time_point::type> + from_utc(const std::chrono::time_point&) NOEXCEPT; + + template + static + std::chrono::time_point::type> + to_local(const std::chrono::time_point&) NOEXCEPT; + + template + static + std::chrono::time_point::type> + from_local(const std::chrono::time_point&) NOEXCEPT; +}; + +template + using gps_time = std::chrono::time_point; + +using gps_seconds = gps_time; + +template +inline +utc_time::type> +gps_clock::to_utc(const gps_time& t) NOEXCEPT +{ + using std::chrono::seconds; + using CD = typename std::common_type::type; + return utc_time{t.time_since_epoch()} + + (sys_days(year{1980}/January/Sunday[1]) - sys_days(year{1970}/January/1) + + seconds{9}); +} + +template +inline +gps_time::type> +gps_clock::from_utc(const utc_time& t) NOEXCEPT +{ + using std::chrono::seconds; + using CD = typename std::common_type::type; + return gps_time{t.time_since_epoch()} - + (sys_days(year{1980}/January/Sunday[1]) - sys_days(year{1970}/January/1) + + seconds{9}); +} + +inline +gps_clock::time_point +gps_clock::now() +{ + return from_utc(utc_clock::now()); +} + +template +inline +local_time::type> +gps_clock::to_local(const gps_time& t) NOEXCEPT +{ + using CD = typename std::common_type::type; + return local_time{t.time_since_epoch()} + + (local_days(year{1980}/January/Sunday[1]) - local_days(year{1970}/January/1)); +} + +template +inline +gps_time::type> +gps_clock::from_local(const local_time& t) NOEXCEPT +{ + using CD = typename std::common_type::type; + return gps_time{t.time_since_epoch()} - + (local_days(year{1980}/January/Sunday[1]) - local_days(year{1970}/January/1)); +} + + +template +std::basic_ostream& +to_stream(std::basic_ostream& os, const CharT* fmt, + const gps_time& t) +{ + const std::string abbrev("GPS"); + CONSTDATA std::chrono::seconds offset{0}; + return to_stream(os, fmt, gps_clock::to_local(t), &abbrev, &offset); +} + +template +std::basic_ostream& +operator<<(std::basic_ostream& os, const gps_time& t) +{ + const CharT fmt[] = {'%', 'F', ' ', '%', 'T', CharT{}}; + return to_stream(os, fmt, t); +} + +template > +std::basic_istream& +from_stream(std::basic_istream& is, const CharT* fmt, + gps_time& tp, + std::basic_string* abbrev = nullptr, + std::chrono::minutes* offset = nullptr) +{ + local_time lp; + from_stream(is, fmt, lp, abbrev, offset); + if (!is.fail()) + tp = gps_clock::from_local(lp); + return is; +} + +// clock_time_conversion + +template +struct clock_time_conversion +{}; + +template <> +struct clock_time_conversion +{ + template + CONSTCD14 + sys_time + operator()(const sys_time& st) const + { + return st; + } +}; + +template <> +struct clock_time_conversion +{ + template + CONSTCD14 + utc_time + operator()(const utc_time& ut) const + { + return ut; + } +}; + +template<> +struct clock_time_conversion +{ + template + CONSTCD14 + local_time + operator()(const local_time& lt) const + { + return lt; + } +}; + +template <> +struct clock_time_conversion +{ + template + utc_time::type> + operator()(const sys_time& st) const + { + return utc_clock::from_sys(st); + } +}; + +template <> +struct clock_time_conversion +{ + template + sys_time::type> + operator()(const utc_time& ut) const + { + return utc_clock::to_sys(ut); + } +}; + +template<> +struct clock_time_conversion +{ + template + CONSTCD14 + local_time + operator()(const sys_time& st) const + { + return local_time{st.time_since_epoch()}; + } +}; + +template<> +struct clock_time_conversion +{ + template + CONSTCD14 + sys_time + operator()(const local_time& lt) const + { + return sys_time{lt.time_since_epoch()}; + } +}; + +template<> +struct clock_time_conversion +{ + template + utc_time::type> + operator()(const local_time& lt) const + { + return utc_clock::from_local(lt); + } +}; + +template<> +struct clock_time_conversion +{ + template + local_time::type> + operator()(const utc_time& ut) const + { + return utc_clock::to_local(ut); + } +}; + +template +struct clock_time_conversion +{ + template + CONSTCD14 + std::chrono::time_point + operator()(const std::chrono::time_point& tp) const + { + return tp; + } +}; + +namespace ctc_detail +{ + +template + using time_point = std::chrono::time_point; + +using std::declval; +using std::chrono::system_clock; + +//Check if TimePoint is time for given clock, +//if not emits hard error +template +struct return_clock_time +{ + using clock_time_point = time_point; + using type = TimePoint; + + static_assert(std::is_same::value, + "time point with appropariate clock shall be returned"); +}; + +// Check if Clock has to_sys method accepting TimePoint with given duration const& and +// returning sys_time. If so has nested type member equal to return type to_sys. +template +struct return_to_sys +{}; + +template +struct return_to_sys + < + Clock, Duration, + decltype(Clock::to_sys(declval const&>()), void()) + > + : return_clock_time + < + system_clock, + decltype(Clock::to_sys(declval const&>())) + > +{}; + +// Similiar to above +template +struct return_from_sys +{}; + +template +struct return_from_sys + < + Clock, Duration, + decltype(Clock::from_sys(declval const&>()), + void()) + > + : return_clock_time + < + Clock, + decltype(Clock::from_sys(declval const&>())) + > +{}; + +// Similiar to above +template +struct return_to_utc +{}; + +template +struct return_to_utc + < + Clock, Duration, + decltype(Clock::to_utc(declval const&>()), void()) + > + : return_clock_time + < + utc_clock, + decltype(Clock::to_utc(declval const&>()))> +{}; + +// Similiar to above +template +struct return_from_utc +{}; + +template +struct return_from_utc + < + Clock, Duration, + decltype(Clock::from_utc(declval const&>()), + void()) + > + : return_clock_time + < + Clock, + decltype(Clock::from_utc(declval const&>())) + > +{}; + +// Similiar to above +template +struct return_to_local +{}; + +template +struct return_to_local + < + Clock, Duration, + decltype(Clock::to_local(declval const&>()), + void()) + > + : return_clock_time + < + local_t, + decltype(Clock::to_local(declval const&>())) + > +{}; + +// Similiar to above +template +struct return_from_local +{}; + +template +struct return_from_local + < + Clock, Duration, + decltype(Clock::from_local(declval const&>()), + void()) + > + : return_clock_time + < + Clock, + decltype(Clock::from_local(declval const&>())) + > +{}; + +} // namespace ctc_detail + +template +struct clock_time_conversion +{ + template + CONSTCD14 + typename ctc_detail::return_to_sys::type + operator()(const std::chrono::time_point& tp) const + { + return SrcClock::to_sys(tp); + } +}; + +template +struct clock_time_conversion +{ + template + CONSTCD14 + typename ctc_detail::return_from_sys::type + operator()(const sys_time& st) const + { + return DstClock::from_sys(st); + } +}; + +template +struct clock_time_conversion +{ + template + CONSTCD14 + typename ctc_detail::return_to_utc::type + operator()(const std::chrono::time_point& tp) const + { + return SrcClock::to_utc(tp); + } +}; + +template +struct clock_time_conversion +{ + template + CONSTCD14 + typename ctc_detail::return_from_utc::type + operator()(const utc_time& ut) const + { + return DstClock::from_utc(ut); + } +}; + +template +struct clock_time_conversion +{ + template + CONSTCD14 + typename ctc_detail::return_to_local::type + operator()(const std::chrono::time_point& tp) const + { + return SrcClock::to_local(tp); + } +}; + +template +struct clock_time_conversion +{ + template + CONSTCD14 + typename ctc_detail::return_from_local::type + operator()(const local_time& lt) const + { + return DstClock::from_local(lt); + } +}; + +namespace clock_cast_detail +{ + +template + using time_point = std::chrono::time_point; +using std::chrono::system_clock; + +template +CONSTCD14 +auto +conv_clock(const time_point& t) + -> decltype(std::declval>()(t)) +{ + return clock_time_conversion{}(t); +} + +//direct trait conversion, 1st candidate +template +CONSTCD14 +auto +cc_impl(const time_point& t, const time_point*) + -> decltype(conv_clock(t)) +{ + return conv_clock(t); +} + +//conversion through sys, 2nd candidate +template +CONSTCD14 +auto +cc_impl(const time_point& t, const void*) + -> decltype(conv_clock(conv_clock(t))) +{ + return conv_clock(conv_clock(t)); +} + +//conversion through utc, 2nd candidate +template +CONSTCD14 +auto +cc_impl(const time_point& t, const void*) + -> decltype(0, // MSVC_WORKAROUND + conv_clock(conv_clock(t))) +{ + return conv_clock(conv_clock(t)); +} + +//conversion through sys and utc, 3rd candidate +template +CONSTCD14 +auto +cc_impl(const time_point& t, ...) + -> decltype(conv_clock(conv_clock(conv_clock(t)))) +{ + return conv_clock(conv_clock(conv_clock(t))); +} + +//conversion through utc and sys, 3rd candidate +template +CONSTCD14 +auto +cc_impl(const time_point& t, ...) + -> decltype(0, // MSVC_WORKAROUND + conv_clock(conv_clock(conv_clock(t)))) +{ + return conv_clock(conv_clock(conv_clock(t))); +} + +} // namespace clock_cast_detail + +template +CONSTCD14 +auto +clock_cast(const std::chrono::time_point& tp) + -> decltype(clock_cast_detail::cc_impl(tp, &tp)) +{ + return clock_cast_detail::cc_impl(tp, &tp); +} + +// Deprecated API + +template +inline +sys_time::type> +to_sys_time(const utc_time& t) +{ + return utc_clock::to_sys(t); +} + +template +inline +sys_time::type> +to_sys_time(const tai_time& t) +{ + return utc_clock::to_sys(tai_clock::to_utc(t)); +} + +template +inline +sys_time::type> +to_sys_time(const gps_time& t) +{ + return utc_clock::to_sys(gps_clock::to_utc(t)); +} + + +template +inline +utc_time::type> +to_utc_time(const sys_time& t) +{ + return utc_clock::from_sys(t); +} + +template +inline +utc_time::type> +to_utc_time(const tai_time& t) +{ + return tai_clock::to_utc(t); +} + +template +inline +utc_time::type> +to_utc_time(const gps_time& t) +{ + return gps_clock::to_utc(t); +} + + +template +inline +tai_time::type> +to_tai_time(const sys_time& t) +{ + return tai_clock::from_utc(utc_clock::from_sys(t)); +} + +template +inline +tai_time::type> +to_tai_time(const utc_time& t) +{ + return tai_clock::from_utc(t); +} + +template +inline +tai_time::type> +to_tai_time(const gps_time& t) +{ + return tai_clock::from_utc(gps_clock::to_utc(t)); +} + + +template +inline +gps_time::type> +to_gps_time(const sys_time& t) +{ + return gps_clock::from_utc(utc_clock::from_sys(t)); +} + +template +inline +gps_time::type> +to_gps_time(const utc_time& t) +{ + return gps_clock::from_utc(t); +} + +template +inline +gps_time::type> +to_gps_time(const tai_time& t) +{ + return gps_clock::from_utc(tai_clock::to_utc(t)); +} + +} // namespace date + +#endif // TZ_H diff --git a/src/3rd_party/date/tz_private.h b/src/3rd_party/date/tz_private.h new file mode 100644 index 00000000..aec01d04 --- /dev/null +++ b/src/3rd_party/date/tz_private.h @@ -0,0 +1,316 @@ +#ifndef TZ_PRIVATE_H +#define TZ_PRIVATE_H + +// The MIT License (MIT) +// +// Copyright (c) 2015, 2016 Howard Hinnant +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// Our apologies. When the previous paragraph was written, lowercase had not yet +// been invented (that would involve another several millennia of evolution). +// We did not mean to shout. + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) +#include "tz.h" +#else +#include "date.h" +#include +#endif + +namespace date +{ + +namespace detail +{ + +#if !USE_OS_TZDB + +enum class tz {utc, local, standard}; + +//forward declare to avoid warnings in gcc 6.2 +class MonthDayTime; +std::istream& operator>>(std::istream& is, MonthDayTime& x); +std::ostream& operator<<(std::ostream& os, const MonthDayTime& x); + + +class MonthDayTime +{ +private: + struct pair + { +#if defined(_MSC_VER) && (_MSC_VER < 1900) + pair() : month_day_(date::jan / 1), weekday_(0U) {} + + pair(const date::month_day& month_day, const date::weekday& weekday) + : month_day_(month_day), weekday_(weekday) {} +#endif + + date::month_day month_day_; + date::weekday weekday_; + }; + + enum Type {month_day, month_last_dow, lteq, gteq}; + + Type type_{month_day}; + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) + union U +#else + struct U +#endif + { + date::month_day month_day_; + date::month_weekday_last month_weekday_last_; + pair month_day_weekday_; + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) + U() : month_day_{date::jan/1} {} +#else + U() : + month_day_(date::jan/1), + month_weekday_last_(date::month(0U), date::weekday_last(date::weekday(0U))) + {} + +#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900) + + U& operator=(const date::month_day& x); + U& operator=(const date::month_weekday_last& x); + U& operator=(const pair& x); + } u; + + std::chrono::hours h_{0}; + std::chrono::minutes m_{0}; + std::chrono::seconds s_{0}; + tz zone_{tz::local}; + +public: + MonthDayTime() = default; + MonthDayTime(local_seconds tp, tz timezone); + MonthDayTime(const date::month_day& md, tz timezone); + + date::day day() const; + date::month month() const; + tz zone() const {return zone_;} + + void canonicalize(date::year y); + + sys_seconds + to_sys(date::year y, std::chrono::seconds offset, std::chrono::seconds save) const; + sys_days to_sys_days(date::year y) const; + + sys_seconds to_time_point(date::year y) const; + int compare(date::year y, const MonthDayTime& x, date::year yx, + std::chrono::seconds offset, std::chrono::minutes prev_save) const; + + friend std::istream& operator>>(std::istream& is, MonthDayTime& x); + friend std::ostream& operator<<(std::ostream& os, const MonthDayTime& x); +}; + +// A Rule specifies one or more set of datetimes without using an offset. +// Multiple dates are specified with multiple years. The years in effect +// go from starting_year_ to ending_year_, inclusive. starting_year_ <= +// ending_year_. save_ is in effect for times from the specified time +// onward, including the specified time. When the specified time is +// local, it uses the save_ from the chronologically previous Rule, or if +// there is none, 0. + +//forward declare to avoid warnings in gcc 6.2 +class Rule; +bool operator==(const Rule& x, const Rule& y); +bool operator<(const Rule& x, const Rule& y); +bool operator==(const Rule& x, const date::year& y); +bool operator<(const Rule& x, const date::year& y); +bool operator==(const date::year& x, const Rule& y); +bool operator<(const date::year& x, const Rule& y); +bool operator==(const Rule& x, const std::string& y); +bool operator<(const Rule& x, const std::string& y); +bool operator==(const std::string& x, const Rule& y); +bool operator<(const std::string& x, const Rule& y); +std::ostream& operator<<(std::ostream& os, const Rule& r); + +class Rule +{ +private: + std::string name_; + date::year starting_year_{0}; + date::year ending_year_{0}; + MonthDayTime starting_at_; + std::chrono::minutes save_{0}; + std::string abbrev_; + +public: + Rule() = default; + explicit Rule(const std::string& s); + Rule(const Rule& r, date::year starting_year, date::year ending_year); + + const std::string& name() const {return name_;} + const std::string& abbrev() const {return abbrev_;} + + const MonthDayTime& mdt() const {return starting_at_;} + const date::year& starting_year() const {return starting_year_;} + const date::year& ending_year() const {return ending_year_;} + const std::chrono::minutes& save() const {return save_;} + + static void split_overlaps(std::vector& rules); + + friend bool operator==(const Rule& x, const Rule& y); + friend bool operator<(const Rule& x, const Rule& y); + friend bool operator==(const Rule& x, const date::year& y); + friend bool operator<(const Rule& x, const date::year& y); + friend bool operator==(const date::year& x, const Rule& y); + friend bool operator<(const date::year& x, const Rule& y); + friend bool operator==(const Rule& x, const std::string& y); + friend bool operator<(const Rule& x, const std::string& y); + friend bool operator==(const std::string& x, const Rule& y); + friend bool operator<(const std::string& x, const Rule& y); + + friend std::ostream& operator<<(std::ostream& os, const Rule& r); + +private: + date::day day() const; + date::month month() const; + static void split_overlaps(std::vector& rules, std::size_t i, std::size_t& e); + static bool overlaps(const Rule& x, const Rule& y); + static void split(std::vector& rules, std::size_t i, std::size_t k, + std::size_t& e); +}; + +inline bool operator!=(const Rule& x, const Rule& y) {return !(x == y);} +inline bool operator> (const Rule& x, const Rule& y) {return y < x;} +inline bool operator<=(const Rule& x, const Rule& y) {return !(y < x);} +inline bool operator>=(const Rule& x, const Rule& y) {return !(x < y);} + +inline bool operator!=(const Rule& x, const date::year& y) {return !(x == y);} +inline bool operator> (const Rule& x, const date::year& y) {return y < x;} +inline bool operator<=(const Rule& x, const date::year& y) {return !(y < x);} +inline bool operator>=(const Rule& x, const date::year& y) {return !(x < y);} + +inline bool operator!=(const date::year& x, const Rule& y) {return !(x == y);} +inline bool operator> (const date::year& x, const Rule& y) {return y < x;} +inline bool operator<=(const date::year& x, const Rule& y) {return !(y < x);} +inline bool operator>=(const date::year& x, const Rule& y) {return !(x < y);} + +inline bool operator!=(const Rule& x, const std::string& y) {return !(x == y);} +inline bool operator> (const Rule& x, const std::string& y) {return y < x;} +inline bool operator<=(const Rule& x, const std::string& y) {return !(y < x);} +inline bool operator>=(const Rule& x, const std::string& y) {return !(x < y);} + +inline bool operator!=(const std::string& x, const Rule& y) {return !(x == y);} +inline bool operator> (const std::string& x, const Rule& y) {return y < x;} +inline bool operator<=(const std::string& x, const Rule& y) {return !(y < x);} +inline bool operator>=(const std::string& x, const Rule& y) {return !(x < y);} + +struct zonelet +{ + enum tag {has_rule, has_save, is_empty}; + + std::chrono::seconds gmtoff_; + tag tag_ = has_rule; + +#if !defined(_MSC_VER) || (_MSC_VER >= 1900) + union U +#else + struct U +#endif + { + std::string rule_; + std::chrono::minutes save_; + + ~U() {} + U() {} + U(const U&) {} + U& operator=(const U&) = delete; + } u; + + std::string format_; + date::year until_year_{0}; + MonthDayTime until_date_; + sys_seconds until_utc_; + local_seconds until_std_; + local_seconds until_loc_; + std::chrono::minutes initial_save_{0}; + std::string initial_abbrev_; + std::pair first_rule_{nullptr, date::year::min()}; + std::pair last_rule_{nullptr, date::year::max()}; + + ~zonelet(); + zonelet(); + zonelet(const zonelet& i); + zonelet& operator=(const zonelet&) = delete; +}; + +#else // USE_OS_TZDB + +struct ttinfo +{ + std::int32_t tt_gmtoff; + unsigned char tt_isdst; + unsigned char tt_abbrind; + unsigned char pad[2]; +}; + +static_assert(sizeof(ttinfo) == 8, ""); + +struct expanded_ttinfo +{ + std::chrono::seconds offset; + std::string abbrev; + bool is_dst; +}; + +struct transition +{ + sys_seconds timepoint; + const expanded_ttinfo* info; + + transition(sys_seconds tp, const expanded_ttinfo* i = nullptr) + : timepoint(tp) + , info(i) + {} + + friend + std::ostream& + operator<<(std::ostream& os, const transition& t) + { + using date::operator<<; + os << t.timepoint << "Z "; + if (t.info->offset >= std::chrono::seconds{0}) + os << '+'; + os << make_time(t.info->offset); + if (t.info->is_dst > 0) + os << " daylight "; + else + os << " standard "; + os << t.info->abbrev; + return os; + } +}; + +#endif // USE_OS_TZDB + +} // namespace detail + +} // namespace date + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#include "tz.h" +#endif + +#endif // TZ_PRIVATE_H diff --git a/src/3rd_party/fast_log/.clang-format b/src/3rd_party/fast_log/.clang-format new file mode 100644 index 00000000..3b4540c7 --- /dev/null +++ b/src/3rd_party/fast_log/.clang-format @@ -0,0 +1,4 @@ +BasedOnStyle: LLVM +IndentWidth: 4 +Cpp11BracedListStyle: false +ColumnLimit: 100 diff --git a/src/3rd_party/fast_log/LICENSE b/src/3rd_party/fast_log/LICENSE new file mode 100644 index 00000000..7b493b33 --- /dev/null +++ b/src/3rd_party/fast_log/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 Nadav Rotem + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/src/3rd_party/fast_log/README.md b/src/3rd_party/fast_log/README.md new file mode 100644 index 00000000..edeecb5d --- /dev/null +++ b/src/3rd_party/fast_log/README.md @@ -0,0 +1,224 @@ +# Logarithmic and Exponential Function Approximation + +This repository contains a fast and *readable* approximation of the +logarithmic and exponential functions. It should be easy to follow the C++ and +Python code in the 'exp' and 'log' subdirectories. + +I was always curious about the implementation of these functions and I finally +found an excuse to learn how they work when I found that log was one of the +hottest functions in a program that I was trying to optimize. I decided to write +a fast approximation function and share my findings here. + +### Definition + +Power is the multiplication of some base $n$ times: $x^n$. Exponent is +raising the constant $e$ to some power: $e^n$, and logarithm (log) is +the computation of the number $n$ for base $e$: $log_e(x)$. Log is valid for all +positive numbers, because the base $e$ is a positive number. Log is defined for +base $e$, but it is possible to compute logs for different bases. Example: +``` + >>> exp(2) + 7.38905609893065 + >>> log(exp(2)) + 2.0 + + >>> log(256)/log(2) + 8.0 +``` + +### Problem + +The libc implementation is accurate up to 1 +[ULP](https://en.wikipedia.org/wiki/Unit_in_the_last_place). In many cases this +accuracy is not useful because many programs don't need this level of accuracy. +We are going to implement faster implementations that are going to be less +accurate. Accuracy is not the only requirement. Some mathematical functions +need to be monotonic, because you want the ordering of $f(x)$, and $f(x+ε)$ to +be consistent. + +### How? + +There are several tricks that we can use to approximate logarithmic and +exponential function: fitting a polynomial, reducing the range, using lookup +tables, and recursive reduced-precision helper functions and a few other. Let's +use these tools to approximate $log$ and $exp$. + +The first step is to figure out how to approximate a region in the target +function. To do that we'll use a polynomial. + +We need to find coefficients that construct the polynomial that approximates +$exp$ or $log$ function. We want to create a curve in the form +$f(x) = ... C2 * X^2 + C1 * X + C0$, to approximate a *segment*. +Polynomials are not shaped like $log$ or $exp$, so we can only hope to fit a segment. + +There are two tools that we could use. The first tool is a part of Scipy. +Scipy has an SGD-based fitting [technique](https://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm), +that can fit a polynomial in a few lines of python. + +This is how we fit a polynomial with scipy: +``` + import numpy as np + from scipy.optimize import curve_fit + from math import log + + # Generate the training data: + x = np.arange(1, 2, 0.001) + y = np.log(np.arange(1, 2, 0.001)) + + # The polynomial to fit: + def func1(x, a, b, c, d): return a * x**3 + b*x**2 + c*x + d + + params, _ = curve_fit(func1, x, y) + a, b, c, d = params[0], params[1], params[2], params[3] +``` + +The second solution is to use minimax polynomials with Sollya. Sollya is a +floating-point development toolkit that library designers use. Minimax +polynomials minimize maximum error and in theory should generate excellent +results. This is a short Sollya script to find the polynomial coefficients: + +``` display = decimal; Q = fpminimax(log(x), 4, [|D...|], [0.5, 1]); Q; ``` + +![Error](error.png "Error") + +We've constructed a polynomial that approximates a function segment. Let's +evaluate how well it works. This is straight forward. Just subtract the real +function from the approximation function. As you can see the maximum error for +$log$ is around 0.002, for all values in the ranges $[0.5 .. 1]$. + + +### Range reduction + +The polynomial approximation is accurate within a segment, but the error +increases rapidly outside the target range. The next step is to figure out how to +evaluate the entire range of the function. We use different tricks for each of +the functions. + +#### Exp + +One of the identifies of the exponential function is $e^{a+b} = e^a * e^b$. +This allows us to split the input $x$ into two parts: an integral part, and a +fraction. For example, we would split the number 3.1415 into 3 and 0.1415. We can check that +$e^{3.1415} = e^3 * e^{0.1415}$. + +This makes things easier for us because we can evaluate the fraction using our +polynomial, and we can evaluate the integral part using a small lookup table. +The highest number that we can compute without overflowing double-precision floats +is 710, so this is the size of our lookup table. + +One way to handle negative numbers is to rely on the identity $e^{-x} = 1/{e^x}$, +but branch mispredictions make this inefficient. So, instead we'll just extend the +lookup table to the range $[-710, 710]$. + +And that's it. This is our implementation: + +``` + double expExp(double x) { + double integer = trunc(x); + // X is now the fractional part of the number. + x = x - integer; + + // Use a 4-part polynomial to approximate exp(x); + double c[] = {0.28033708, 0.425302, 1.01273643, 1.00020947}; + + // Use Horner's method to evaluate the polynomial. + double val = c[3] + x * (c[2] + x * (c[1] + x * (c[0]))); + return val * EXP_TABLE[(unsigned)integer + table_zero_idx]; + } +``` + + +#### Log + +Just like $exp$, we need to find a trick to collapse the range of the function. +Log is only valid for positive integers, so we don't have to worry about the +negatives. Consider the identity $\log_b(xy) = \log_b x +\log_b y$. If y is +constant then we can turn this into $log(x/c1) - c2$. Let's use this trick to collapse +a larger space into the approximated segment. + +This is an example of adjusting the range of the log input, in base-2. +``` + >>> log(32, 2) + 5.0 + >>> log(16, 2) + 1 + 5.0 + >>> log(8, 2) + 2 + 5.0 + >>> log(4, 2) + 3 + 5.0 + >>> log(2, 2) + 4 + 5.0 +``` + +We could write this code: + +``` + // Bring down large values. + while (x > 2) { + x /= 2; + shift -= 1; + } + +``` +However, there is a better way. Again, we will split the input into integral +and fraction components, but this time we are going to rely on the underlying +representation of the floating point number, that already stores the number as a +multiplication of a power-of-two. +The [exponent](https://en.wikipedia.org/wiki/IEEE_754) bits of the float/double are what we need to figure out the log bias. + +The function $frexp$ does that except that it's too slow because it requires +going through the ELF PLT/GOT indirection, which has [significant overhead](https://github.com/nadavrot/memset_benchmark) for small functions. But +luckily we can implement it in a few lines of code. + +And that's it. This is the code: + +``` + double fastLog(double x) { + /// Extract the fraction, and the power-of-two exponent. + auto a = my_frexp(x); + x = a.first; + int pow2 = a.second; + + // Use a 4-part polynom to approximate log2(x); + double c[] = {1.33755322, -4.42852392, 6.30371424, -3.21430967}; + double log2 = 0.6931471805599453; + + // Use Horner's method to evaluate the polynomial. + double val = c[3] + x * (c[2] + x * (c[1] + x * (c[0]))); + + // Compute log2(x), and convert the result to base-e. + return log2 * (pow2 + val); + } + +``` + +### Faster Approximation + +Libc implementations usually use [5-term](https://github.com/rutgers-apl/The-RLIBM-Project/blob/main/libm/logf.c) to [7-term](https://github.com/Arquivotheca/SunOS-4.1.3/blob/2e8a93c3946e57cdcb7f39f2ab5ec270b3a51638/usr.lib/libm/C/log.c) +polynomials for calculating $log$ and $exp$. It is possible to use few coefficients to get faster and less accurate implementations. + +The C++ implementation uses Horner's method for fast evaluation of small polynomials. Notice that we can rewrite our polynomial this way: $a x^4 + b x^3 + c x^2 + dx + e = e + x(d + x(c + x(b + ax))) $. + +Horner's representation requires fewer multiplications, and each pair of addition and multiplication can be converted by the compiler to a fused multiply-add instruction (fma). + +Make sure to compile the code with: +``` + + clang++ bench.cc -mavx2 -mfma -O3 -fno-builtin ; ./a.out +``` + +Benchmark results: + +``` +EXP: +name = nop, sum = , time = 163ms +name = fast_exp, sum = 1.10837e+11, time = 166ms +name = libm_exp, sum = 1.10829e+11, time = 383ms + +LOG: +name = nop , sum = , time = 165ms +name = fast_log, sum = 1.46016e+08, time = 167ms +name = libm_log, sum = 1.46044e+08, time = 418ms + +``` + diff --git a/src/3rd_party/fast_log/approx.png b/src/3rd_party/fast_log/approx.png new file mode 100644 index 00000000..af07f50c Binary files /dev/null and b/src/3rd_party/fast_log/approx.png differ diff --git a/src/3rd_party/fast_log/chart.png b/src/3rd_party/fast_log/chart.png new file mode 100644 index 00000000..8fa03bfe Binary files /dev/null and b/src/3rd_party/fast_log/chart.png differ diff --git a/src/3rd_party/fast_log/error.png b/src/3rd_party/fast_log/error.png new file mode 100644 index 00000000..b4e15fca Binary files /dev/null and b/src/3rd_party/fast_log/error.png differ diff --git a/src/3rd_party/fast_log/src/Makefile b/src/3rd_party/fast_log/src/Makefile new file mode 100644 index 00000000..6de6998a --- /dev/null +++ b/src/3rd_party/fast_log/src/Makefile @@ -0,0 +1,17 @@ + +all: exp_approx log_approx log_accurate exp_accurate + +exp_approx: exp_approx.cc util.h + g++ exp_approx.cc -O3 -g -Wall -march=native -mfma -o exp_approx + +log_approx: log_approx.cc util.h + g++ log_approx.cc -O3 -g -Wall -march=native -mfma -o log_approx + +log_accurate: log_accurate.cc util.h + g++ log_accurate.cc -O3 -g -Wall -march=native -mfma -o log_accurate + +exp_accurate: exp_accurate.cc util.h + g++ exp_accurate.cc -O3 -g -Wall -march=native -mfma -o exp_accurate + +clean: + rm -f ./exp_approx ./log_approx ./log_accurate ./exp_accurate diff --git a/src/3rd_party/fast_log/src/exp.py b/src/3rd_party/fast_log/src/exp.py new file mode 100644 index 00000000..643475e8 --- /dev/null +++ b/src/3rd_party/fast_log/src/exp.py @@ -0,0 +1,57 @@ +import numpy as np +from scipy.optimize import curve_fit +import matplotlib.pyplot as plt +from math import exp + +# Generate the training data: +start = 0 +end = 1 +start_poly = start - 0.1 +end_poly = end + 0.1 +x = np.arange(start_poly, end_poly, 0.001) +y = np.exp(np.arange(start_poly, end_poly, 0.001)) + +# The polynomial to fit: +def func1(x, a, b, c, d): + return a * x**3 + b*x**2 + c*x + d + +# The sollya expression: +# display = decimal; Q = fpminimax(exp(x), 3, [|D...|], [0, 1]); Q; +def func2(x, a, b, c, d): + return 0.99967771959938100945208816483500413596630096435547 +\ + x * (1.01217403745740819331899729149881750345230102539062 +\ + x * (0.43418272190290696510572843180852942168712615966797 +\ + x * 0.27137130054267810663759519229643046855926513671875)) + +params, _ = curve_fit(func1, x, y) +a, b, c, d = params[0], params[1], params[2], params[3] +yfit1 = func1(x, a, b, c, d) +yerr1 = yfit1 - y +yfit2 = func2(x, a, b, c, d) +yerr2 = yfit2 - y + +print("The polynomial parameters:", params) + +fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(5, 3)) +p0 = axes[0] +p1 = axes[1] +min_fit = min(min(yerr1), min(yfit2)) +max_fit = min(max(yfit1), max(yfit2)) +p0.plot(x, y, 'o', label="log") +p0.plot(x, yfit1, label="curve_fit", color="red") +p0.plot(x, yfit2, label="minimax", color="gold") +p0.vlines(x = start, ymin = min_fit, ymax = max_fit, colors = 'purple') +p0.vlines(x = end, ymin = min_fit, ymax = max_fit, colors = 'purple') +p0.legend(loc='best', fancybox=True) +p0.grid(True) + +min_err = min(min(yerr1), min(yerr2)) +max_err = min(max(yerr1), max(yerr2)) +p1.plot(x, yerr1, label="curve_fit error", color="red") +p1.plot(x, yerr2, label="minimax error", color="gold") +p1.legend(loc='best', fancybox=True) +p1.vlines(x = start, ymin = min_err, ymax = max_err, colors = 'purple') +p1.vlines(x = end, ymin = min_err, ymax = max_err, colors = 'purple') +p1.grid(True) +fig.tight_layout() +plt.show() diff --git a/src/3rd_party/fast_log/src/exp.sollya b/src/3rd_party/fast_log/src/exp.sollya new file mode 100644 index 00000000..a116e35e --- /dev/null +++ b/src/3rd_party/fast_log/src/exp.sollya @@ -0,0 +1,2 @@ +display = decimal; Q = fpminimax(exp(x), 6, [|D...|], [-0.000001, 1.001]); Q; +display = decimal; Q = fpminimax(exp(x), 5, [|D...|], [-0.0039, 0.0039]); Q; diff --git a/src/3rd_party/fast_log/src/exp_accurate.cc b/src/3rd_party/fast_log/src/exp_accurate.cc new file mode 100644 index 00000000..1fbb127c --- /dev/null +++ b/src/3rd_party/fast_log/src/exp_accurate.cc @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "exp_table.h" +#include "util.h" + +/// @return True if \p x is a NAN. +bool is_nan(float x) { + unsigned xb = bit_cast(x); + xb >>= 23; + return (xb & 0xff) == 0xff; +} + +// Approximate the function \p exp in the range -0.004, 0.004. +// Q = fpminimax(exp(x), 5, [|D...|], [-0.0039, 0.0039]) +double approximate_exp_pol_around_zero(float x) { + return 1 + + x * (1 + x * (0.49999999999985944576508245518198236823081970214844 + + x * (0.166666666666697105281258473041816614568233489990234 + + x * (4.1666696240209417922972789938285131938755512237549e-2 + + x * 8.3333337622652735310335714302709675393998622894287e-3)))); +} + +float __attribute__((noinline)) my_exp(float x) { + if (x >= 710) { + return bit_cast(0x7f800000); // Inf + } else if (x <= -710) { + return 0; + } else if (is_nan(x)) { + return x; + } + + // Split X into 3 numbers such that: x = I1 + (I2 << 8) + xt; + int Int1 = int(x); + x = x - Int1; + int Int2 = int(x * 256); + x = x - (float(Int2) / 256); + + return approximate_exp_pol_around_zero(x) * EXP_TABLE[Int1 + 710] * EXP_TABLE_r256[Int2 + 256]; +} + +// Wrap the standard exp(double) and use it as the ground truth. +float accurate_exp(float x) { return exp((double)x); } + +// Wrap the standard exp(double) and use it as the ground truth. +float libc_exp(float x) { return expf(x); } + +int main(int argc, char **argv) { print_ulp_deltas(my_exp, accurate_exp); } diff --git a/src/3rd_party/fast_log/src/exp_approx.cc b/src/3rd_party/fast_log/src/exp_approx.cc new file mode 100644 index 00000000..bd25c4e1 --- /dev/null +++ b/src/3rd_party/fast_log/src/exp_approx.cc @@ -0,0 +1,32 @@ +#include +#include +#include +#include +#include + +#include "exp_table.h" +#include "util.h" + +double __attribute__((noinline)) nop(double x) { return x + 1; } + +double __attribute__((noinline)) fast_exp(double x) { + double integer = trunc(x); + // X is now the fractional part of the number. + x = x - integer; + + // Use a 4-part polynomial to approximate exp(x); + double c[] = { 0.28033708, 0.425302, 1.01273643, 1.00020947 }; + + // Use Horner's method to evaluate the polynomial. + double val = c[3] + x * (c[2] + x * (c[1] + x * (c[0]))); + return val * EXP_TABLE[(unsigned)integer + 710]; +} + +int main(int argc, char **argv) { + std::vector iv = generate_test_vector(-10., 10., 10000); + bench("nop", nop, iv); + bench("trunc", trunc, iv); + bench("fast_exp", fast_exp, iv); + bench("libm_exp", exp, iv); + return 0; +} diff --git a/src/3rd_party/fast_log/src/exp_table.h b/src/3rd_party/fast_log/src/exp_table.h new file mode 100644 index 00000000..531478ba --- /dev/null +++ b/src/3rd_party/fast_log/src/exp_table.h @@ -0,0 +1,999 @@ +#ifndef EXP_TABLE_H +#define EXP_TABLE_H + +// Generated with: +// >>> from math import exp +// >>> [exp(i) for i in range(-710, 710)] +static double EXP_TABLE[1420] = { + 4.47628622567513e-309, 1.216780750623423e-308, 3.307553003638408e-308, + 8.99086122645542e-308, 2.443969469407077e-307, 6.643397797997952e-307, + 1.8058627513522668e-306, 4.9088439016919216e-306, 1.334362117671115e-305, + 3.6271722970495225e-305, 9.85967654375977e-305, 2.680137958338607e-304, + 7.285370309915161e-304, 1.9803689727037426e-303, 5.38320099214469e-303, + 1.4633057435889614e-302, 3.977677412277625e-302, 1.0812448229266266e-301, + 2.9391281542768673e-301, 7.98937865328318e-301, 2.171738281389827e-300, + 5.9033967064708435e-300, 1.604709599338467e-299, 4.36205294383555e-299, + 1.185728925200446e-298, 3.223145390850647e-298, 8.76141754643084e-298, + 2.3816002108005187e-297, 6.473860575673281e-297, 1.7597777562830093e-296, + 4.783571897030535e-296, 1.3003096562825466e-295, 3.5346081100426732e-295, + 9.608060996252968e-295, 2.6117417612840555e-294, 7.09945017032607e-294, + 1.929830639004783e-293, 5.245823558010209e-293, 1.4259626853041525e-292, + 3.8761684555229417e-292, 1.0536518276694175e-291, 2.864122616676439e-291, + 7.785492463390136e-291, 2.1163162688838255e-290, 5.752744056979149e-290, + 1.5637579633862188e-289, 4.250734855980884e-289, 1.1554695316610313e-288, + 3.140891831252265e-288, 8.537829190048485e-288, 2.3208225941796005e-287, + 6.30864988483559e-287, 1.714868834405883e-286, 4.661496790756256e-286, + 1.2671262019732887e-285, 3.444406129188316e-285, 9.362866590805559e-285, + 2.5450910116073043e-284, 6.918274648626584e-284, 1.880582026165053e-283, + 5.111951948651156e-283, 1.3895726089974245e-282, 3.7772499723621244e-282, + 1.0267629961419394e-281, 2.791031194546799e-281, 7.586809378798905e-281, + 2.0623086070371722e-280, 5.605936011183831e-280, 1.5238513990705191e-279, + 4.142257567365285e-279, 1.1259823474166023e-278, 3.06073735414821e-278, + 8.319946731466896e-278, 2.261596001389369e-277, 6.14765531389236e-277, + 1.6711059727383288e-276, 4.542536999123976e-276, 1.2347895779821586e-275, + 3.3565060717995146e-275, 9.123929462085072e-275, 2.4801411660927963e-274, + 6.741722663803275e-274, 1.8325902209526951e-273, 4.981496696627458e-273, + 1.3541111948971181e-272, 3.6808558548018004e-272, 1.000560358328482e-271, + 2.7198050403207837e-271, 7.393196618055307e-271, 2.009679202108461e-270, + 5.4628744561235025e-270, 1.4849632365233607e-269, 4.036548581771182e-269, + 1.0972476659520735e-268, 2.982628391676622e-268, 8.107624558140589e-268, + 2.2038808508361863e-267, 5.990769268916865e-267, 1.6284599242187592e-266, + 4.426613020377647e-266, 1.2032781734912767e-265, 3.270849193582728e-265, + 8.891089926545851e-265, 2.4168488182524857e-264, 6.569676224788449e-264, + 1.785823150070186e-263, 4.8543706176772776e-263, 1.3195547438637656e-262, + 3.5869216819018034e-262, 9.750264028019429e-262, 2.6503965530043108e-261, + 7.204524788242109e-261, 1.9583928814561274e-260, 5.3234637826457406e-260, + 1.4470674864825768e-259, 3.933537253059494e-259, 1.069246283655833e-258, + 2.906512743009017e-258, 7.900720773506065e-258, 2.147638571035043e-257, + 5.837886901742308e-257, 1.586902188160519e-256, 4.3136473816186357e-256, + 1.172570929183388e-255, 3.187378249378541e-255, 8.66419237571129e-255, + 2.3551716693169407e-254, 6.402020351605795e-254, 1.740249558719502e-253, + 4.730488752451095e-253, 1.285880161551771e-252, 3.4953846767221607e-252, + 9.501440650208043e-252, 2.582759346364262e-251, 7.020667798504735e-251, + 1.908415370032299e-250, 5.1876108215107433e-250, 1.4101388229230154e-249, + 3.8331547379562597e-249, 1.0419594869858195e-248, 2.832339539464062e-248, + 7.69909710215122e-248, 2.0928315748319355e-247, 5.688906039890977e-247, + 1.5464049912046552e-246, 4.2035645870299835e-246, 1.1426473231677555e-245, + 3.10603745490428e-245, 8.443085172179486e-245, 2.2950684999667505e-244, + 6.238642998528377e-244, 1.6958389897142937e-243, 4.6097683097327105e-243, + 1.2530649429752794e-242, 3.406183664368772e-242, 9.258967159247676e-242, + 2.5168482179282025e-241, 6.841502775783763e-241, 1.8597132674765122e-240, + 5.055224781125599e-240, 1.374152566130957e-239, 3.735333950044147e-239, + 1.0153690399631151e-238, 2.760059210511642e-238, 7.502618797404815e-238, + 2.0394232342840765e-237, 5.543727118291579e-237, 1.5069412687587626e-236, + 4.096291067421963e-236, 1.1134873572652229e-235, 3.02677244947294e-235, + 8.227620548282767e-235, 2.236499142785329e-234, 6.079434979197592e-234, + 1.6525617631251106e-233, 4.492128611109229e-233, 1.2210871574679187e-232, + 3.319259031109752e-232, 9.022681508214216e-232, 2.4526191187752155e-231, + 6.666909982697905e-231, 1.8122540257939923e-230, 4.926217186867559e-230, + 1.3390846662104723e-229, 3.640009514928073e-229, 9.894571719847004e-229, + 2.6896234506444874e-228, 7.311154551284224e-228, 1.987377856181155e-227, + 5.402253112739128e-227, 1.4684846469095084e-226, 3.991755131065214e-226, + 1.0850715436432725e-225, 2.9495302596635135e-225, 8.017654507333419e-225, + 2.179424455414719e-224, 5.924289893653081e-224, 1.610388956444074e-223, + 4.377491037053051e-223, 1.189925434026365e-222, 3.234552684535111e-222, + 8.792425785565214e-222, 2.3900291240976666e-221, 6.496772737522576e-221, + 1.7660059276035744e-220, 4.800501821955756e-220, 1.3049116870106872e-219, + 3.547117726544988e-219, 9.642065659472201e-219, 2.6209851870952265e-218, + 7.124576406741286e-218, 1.9366606581912876e-217, 5.264389475052911e-217, + 1.4310094247967382e-216, 3.8898869157786035e-216, 1.057380891792158e-215, + 2.874259263918443e-215, 7.813046727389575e-215, 2.123806294396449e-214, + 5.773104057224809e-214, 1.5692923852557387e-213, 4.2657789743798256e-213, + 1.1595589470279344e-212, 3.1520080147331386e-212, 8.568046109606362e-212, + 2.329036404514219e-211, 6.3309773362105915e-211, 1.72093806494073e-210, + 4.677994669831859e-210, 1.2716107904632215e-209, 3.4565965045886174e-209, + 9.396003466738291e-209, 2.554098548377289e-208, 6.94275967214761e-208, + 1.8872377456157127e-207, 5.130044069889206e-207, 1.3944905574373912e-206, + 3.790618342239785e-206, 1.0303968958333958e-205, 2.800909158044528e-205, + 7.6136604674769635e-205, 2.069607489679963e-204, 5.6257764312397845e-204, + 1.5292445844012282e-203, 4.1569177650472634e-203, 1.1299674023126563e-202, + 3.071569856457565e-202, 8.349392525651157e-202, 2.2696001981149314e-201, + 6.169412976402867e-201, 1.6770203186015345e-200, 4.55861385801115e-200, + 1.239159721319329e-199, 3.3683853530207066e-199, 9.156220696363793e-199, + 2.4889188336286325e-198, 6.765582837962194e-198, 1.8390760887367006e-197, + 4.999127113166508e-197, 1.3589036389877445e-196, 3.693883068487256e-196, + 1.0041015221521447e-195, 2.7294309215942424e-195, 7.419362476203855e-195, + 2.0167918197815843e-194, 5.482208555497131e-194, 1.4902187896230561e-193, + 4.050834656260586e-193, 1.1011310236205293e-192, 2.993184452260193e-192, + 8.136318905805023e-192, 2.2116807832197573e-191, 6.011971683378335e-191, + 1.6342233380137666e-190, 4.4422796033665057e-190, 1.2075367922765428e-189, + 3.282425319641051e-189, 8.922557099654142e-189, 2.4254024827378097e-188, + 6.592927495525641e-188, 1.7921435007435354e-187, 4.871551112062132e-187, + 1.3242248864327946e-186, 3.5996164455835086e-186, 9.784771973451989e-186, + 2.659776785104989e-185, 7.230022902708112e-185, 1.9653239875774178e-184, + 5.342304482466365e-184, 1.4521889196783625e-183, 3.9474587518512645e-183, + 1.0730305393748917e-182, 2.916799416564376e-182, 7.928682851306888e-182, + 2.1552394518322364e-181, 5.858548237893603e-181, 1.5925185216216938e-180, + 4.328914158808713e-180, 1.1767208694848799e-179, 3.198658956689277e-179, + 8.69485651740623e-179, 2.3635070472324053e-178, 6.424678257926741e-178, + 1.7464086162218176e-177, 4.747230806540073e-177, 1.2904311236918859e-176, + 3.50775547440964e-176, 9.535067964765462e-176, 2.5919001981743924e-175, + 7.04551520987685e-175, 1.9151695967140057e-174, 5.20597071316492e-174, + 1.4151295589086178e-173, 3.84672096489656e-173, 1.0456471698030763e-172, + 2.842363700655332e-172, 7.726345597362994e-172, 2.1002384837706373e-171, + 5.709040105864101e-171, 1.551877997771429e-170, 4.218441761327482e-170, + 1.1466913584229263e-169, 3.1170302824520583e-169, 8.472966775545996e-169, + 2.303191161910391e-168, 6.260722682888491e-168, 1.7018408701917146e-167, + 4.626083112371067e-167, 1.2574997661299533e-166, 3.4182387635625514e-166, + 9.291736316326398e-166, 2.5257557983503035e-165, 6.865716089780698e-165, + 1.8662951286209762e-164, 5.0731161346720364e-164, 1.3790159402541388e-163, + 3.7485539715481897e-163, 1.0189626143857429e-162, 2.7698275585638865e-162, + 7.529171920409294e-162, 2.0466411214592676e-161, 5.5633473698397695e-161, + 1.5122746060840868e-160, 4.110788581358434e-160, 1.1174281901343568e-159, + 3.037484743850101e-159, 8.256739583429307e-159, 2.2444145171954394e-158, + 6.100951197622044e-158, 1.6584104776811452e-157, 4.508027065606742e-157, + 1.2254088054640357e-156, 3.3310064883265936e-156, 9.054614407697357e-156, + 2.4612993808147185e-155, 6.6905053812661495e-155, 1.818667920110323e-154, + 4.9436519592372975e-154, 1.3438239287020702e-153, 3.652892166039281e-153, + 9.92959039626498e-153, 2.6991425138208544e-152, 7.337030047740496e-152, + 1.994411545363099e-151, 5.421372662229435e-151, 1.473681879304291e-150, + 4.00588267344223e-150, 1.0889118078156954e-149, 2.959969179979893e-149, + 8.046030434738165e-149, 2.1871378321977182e-148, 5.9452570255983664e-148, + 1.616088413820251e-147, 4.3929837684707906e-147, 1.1941367950549688e-146, + 3.2460003506922445e-146, 8.823543768458417e-146, 2.398487868841356e-145, + 6.51976598965092e-145, 1.7722561415473398e-144, 4.8174916649430757e-144, + 1.3095300051567673e-143, 3.5596716168395205e-143, 9.676190671336298e-143, + 2.6302613270598386e-142, 7.149791569445333e-142, 1.9435148500492928e-141, + 5.283021100229299e-141, 1.4360740256119015e-140, 3.9036539281428613e-140, + 1.0611231537463512e-139, 2.88443178658586e-139, 7.840698510906002e-139, + 2.131322828462168e-138, 5.793536115188646e-138, 1.5748463944438505e-137, + 4.280876336630965e-137, 1.163662835574428e-136, 3.163163540395093e-136, + 8.59836997230016e-136, 2.337279285007143e-135, 6.353383808468667e-135, + 1.72702877557863e-134, 4.694550937881265e-134, 1.276111250721801e-133, + 3.4688300239292162e-133, 9.429257620059944e-133, 2.563137964446793e-132, + 6.967331352589224e-132, 1.8939170208596268e-131, 5.148200222412013e-131, + 1.3994259113851392e-130, 3.804034025192962e-130, 1.0340436565521947e-129, + 2.8108220814391768e-129, 7.640606587007545e-129, 2.0769322043867094e-128, + 5.645687070125779e-128, 1.5346568571889095e-127, 4.171629847816681e-127, + 1.1339665610377455e-126, 3.082440696949098e-126, 8.378942533819369e-126, + 2.2776327231383779e-125, 6.191247643210744e-125, 1.6829555964029658e-124, + 4.574747615805637e-124, 1.2435453313830803e-123, 3.380306677163709e-123, + 9.188626215152886e-123, 2.4977275669152505e-122, 6.78952745758695e-122, + 1.8455849111782345e-121, 5.016819926933996e-121, 1.3637130444035917e-120, + 3.706956387834846e-120, 1.0076552187941643e-119, 2.7390908706001e-119, + 7.445620940050319e-119, 2.0239296102932935e-118, 5.501611081740457e-118, + 1.4954929430743994e-117, 4.065171291747877e-117, 1.105028125193164e-116, + 3.003777872648744e-116, 8.16511480794845e-116, 2.219508320972814e-115, + 6.033249137014046e-115, 1.6400071495711497e-114, 4.458001633222172e-114, + 1.2118104830828574e-113, 3.2940424157003086e-113, 8.954135640771486e-113, + 2.433986420186662e-112, 6.616261056709485e-112, 1.7984862202794635e-111, + 4.888792411319657e-111, 1.3289115574798703e-110, 3.6123561383267394e-110, + 9.819402048736065e-110, 2.6691902155412764e-109, 7.255611259606534e-109, + 1.9722796241351285e-108, 5.361211862926555e-108, 1.4573284785512322e-107, + 3.961429521341682e-107, 1.0768281882584307e-106, 2.927122496515368e-106, + 7.9567438919514e-106, 2.1628672335193993e-105, 5.8792826982452694e-105, + 1.598154732301378e-104, 4.344234967880666e-104, 1.1808854971746377e-103, + 3.209979588460643e-103, 8.72562918503701e-103, 2.371871925555801e-102, + 6.4474163546704995e-102, 1.7525894717410477e-101, 4.764032113782328e-101, + 1.2949981925089835e-100, 3.5201700545844787e-100, 9.568814292462674e-100, + 2.601073401110048e-99, 7.070450560725609e-99, 1.921947727823849e-98, + 5.22439558379172e-98, 1.4201379580102718e-97, 3.860335205164256e-97, + 1.0493479039958717e-96, 2.852423339163565e-96, 7.753690529920792e-96, + 2.107671607097867e-95, 5.729245429933205e-95, 1.5573703742969461e-94, + 4.23337158863185e-94, 1.1507497062492758e-93, 3.1280620156019908e-93, + 8.502954135303866e-93, 2.3113425714217192e-92, 6.282880511239462e-92, + 1.7078639924081707e-91, 4.642455656042647e-91, 1.2619502849247644e-90, + 3.4303365279297016e-90, 9.324621449370601e-90, 2.534694904308355e-89, + 6.89001509906914e-89, 1.8729002841608093e-88, 5.09107080895011e-88, + 1.3838965267367376e-87, 3.761820781096061e-87, 1.0225689071173033e-86, + 2.779630478564191e-86, 7.555819019711961e-86, 2.0538845540408258e-85, + 5.583037061001886e-85, 1.5176268190534823e-84, 4.125337404615185e-84, + 1.1213829703227856e-83, 3.0482349509718567e-83, 8.285961676100547e-83, + 2.252357905545217e-82, 6.122543565829638e-82, 1.664279891894355e-81, + 4.52398178760621e-81, 1.2297457485529627e-80, 3.3427955219162848e-80, + 9.086660323479307e-80, 2.470010363869359e-79, 6.714184288211594e-79, + 1.8251045143570802e-78, 4.961148436415422e-78, 1.3485799642996046e-77, + 3.665820411179563e-77, 9.964733010103672e-77, 2.7086952666810816e-76, + 7.362997122252211e-76, 2.001470128041443e-75, 5.440559879258653e-75, + 1.4788975056432133e-74, 4.020060215743355e-74, 1.0927656633766312e-73, + 2.970445045520691e-73, 8.074506789675094e-73, 2.194878508014299e-72, + 5.96629836401057e-72, 1.6218080426054863e-71, 4.408531331463226e-71, + 1.1983630608508849e-70, 3.257488532207521e-70, 8.854771883513433e-70, + 2.4069765506104637e-69, 6.542840619051457e-69, 1.7785284761271306e-68, + 4.834541638053336e-68, 1.314164668364901e-67, 3.5722699376192174e-67, + 9.710436457780846e-67, 2.6395702969591894e-66, 7.175095973164411e-66, + 1.9503933001302485e-65, 5.301718666092324e-65, 1.4411565509640892e-64, + 3.917469664450395e-64, 1.0648786602415064e-63, 2.8946403116483003e-63, + 7.868448159078602e-63, 2.138865964899539e-62, 5.814040485895939e-62, + 1.580420060273613e-61, 4.2960271311739114e-61, 1.1677812485237086e-60, + 3.1743585474772134e-60, 8.628801156620959e-60, 2.3455513385429143e-59, + 6.375869581278994e-59, 1.733141042341547e-58, 4.7111658015535965e-58, + 1.2806276389220833e-57, 3.4811068399043105e-57, 9.462629465836378e-57, + 2.572209372642415e-56, 6.991989996645917e-56, 1.9006199352650016e-55, + 5.166420632837861e-55, 1.4043787324419038e-54, 3.817497188671175e-54, + 1.0377033238158346e-53, 2.820770088460135e-53, 7.667648073722e-53, + 2.0842828425817514e-52, 5.665668176358939e-52, 1.5400882849875202e-51, + 4.1863939993042314e-51, 1.1379798735078682e-50, 3.093350011308561e-50, + 8.408597124803643e-50, 2.2856936767186716e-49, 6.213159586848109e-49, + 1.6889118802245324e-48, 4.590938473882946e-48, 1.2479464629129513e-47, + 3.392270193026015e-47, 9.221146422925876e-47, 2.506567475899953e-46, + 6.813556821545298e-46, 1.8521167695179754e-45, 5.0345753587649823e-45, + 1.368539471173853e-44, 3.720075976020836e-44, 1.0112214926104486e-43, + 2.7487850079102147e-43, 7.47197233734299e-43, 2.031092662734811e-42, + 5.5210822770285325e-42, 1.5007857627073948e-41, 4.0795586671775603e-41, + 1.1089390193121365e-40, 3.0144087850653746e-40, 8.194012623990515e-40, + 2.2273635617957438e-39, 6.054601895401186e-39, 1.6458114310822737e-38, + 4.4737793061811207e-38, 1.2160992992528256e-37, 3.3057006267607343e-37, + 8.985825944049381e-37, 2.4426007377405277e-36, 6.639677199580735e-36, + 1.8048513878454153e-35, 4.906094730649281e-35, 1.3336148155022614e-34, + 3.6251409191435593e-34, 9.854154686111257e-34, 2.6786369618080778e-33, + 7.281290178321643e-33, 1.9792598779469045e-32, 5.380186160021138e-32, + 1.462486227251231e-31, 3.975449735908647e-31, 1.0806392777072785e-30, + 2.9374821117108028e-30, 7.984904245686979e-30, 2.1705220113036395e-29, + 5.900090541597061e-29, 1.603810890548638e-28, 4.359610000063081e-28, + 1.185064864233981e-27, 3.221340285992516e-27, 8.75651076269652e-27, + 2.3802664086944007e-26, 6.47023492564546e-26, 1.7587922024243116e-25, + 4.780892883885469e-25, 1.2995814250075031e-24, 3.532628572200807e-24, + 9.602680054508676e-24, 2.6102790696677047e-23, 7.095474162284704e-23, + 1.9287498479639178e-22, 5.242885663363464e-22, 1.4251640827409352e-21, + 3.873997628687187e-21, 1.0530617357553812e-20, 2.8625185805493937e-20, + 7.781132241133797e-20, 2.1151310375910805e-19, 5.74952226429356e-19, + 1.5628821893349888e-18, 4.248354255291589e-18, 1.1548224173015786e-17, + 3.1391327920480296e-17, 8.533047625744066e-17, 2.3195228302435696e-16, + 6.305116760146989e-16, 1.713908431542013e-15, 4.658886145103398e-15, + 1.2664165549094176e-14, 3.442477108469977e-14, 9.357622968840175e-14, + 2.543665647376923e-13, 6.914400106940203e-13, 1.8795288165390832e-12, + 5.109089028063325e-12, 1.3887943864964021e-11, 3.775134544279098e-11, + 1.026187963170189e-10, 2.7894680928689246e-10, 7.582560427911907e-10, + 2.061153622438558e-09, 5.602796437537268e-09, 1.522997974471263e-08, + 4.139937718785167e-08, 1.1253517471925912e-07, 3.059023205018258e-07, + 8.315287191035679e-07, 2.2603294069810542e-06, 6.14421235332821e-06, + 1.670170079024566e-05, 4.5399929762484854e-05, 0.00012340980408667956, + 0.00033546262790251185, 0.0009118819655545162, 0.0024787521766663585, + 0.006737946999085467, 0.01831563888873418, 0.049787068367863944, + 0.1353352832366127, 0.36787944117144233, 1.0, + 2.718281828459045, 7.38905609893065, 20.085536923187668, + 54.598150033144236, 148.4131591025766, 403.4287934927351, + 1096.6331584284585, 2980.9579870417283, 8103.083927575384, + 22026.465794806718, 59874.14171519782, 162754.79141900392, + 442413.3920089205, 1202604.2841647768, 3269017.3724721107, + 8886110.520507872, 24154952.7535753, 65659969.13733051, + 178482300.96318725, 485165195.4097903, 1318815734.4832146, + 3584912846.131592, 9744803446.248903, 26489122129.84347, + 72004899337.38588, 195729609428.83878, 532048240601.79865, + 1446257064291.475, 3931334297144.042, 10686474581524.463, + 29048849665247.426, 78962960182680.69, 214643579785916.06, + 583461742527454.9, 1586013452313430.8, 4311231547115195.0, + 1.1719142372802612e+16, 3.1855931757113756e+16, 8.659340042399374e+16, + 2.3538526683702e+17, 6.398434935300549e+17, 1.739274941520501e+18, + 4.727839468229346e+18, 1.2851600114359308e+19, 3.4934271057485095e+19, + 9.496119420602448e+19, 2.5813128861900675e+20, 7.016735912097631e+20, + 1.9073465724950998e+21, 5.184705528587072e+21, 1.4093490824269389e+22, + 3.831008000716577e+22, 1.0413759433029089e+23, 2.830753303274694e+23, + 7.694785265142018e+23, 2.091659496012996e+24, 5.685719999335932e+24, + 1.545538935590104e+25, 4.2012104037905144e+25, 1.1420073898156842e+26, + 3.10429793570192e+26, 8.438356668741454e+26, 2.29378315946961e+27, + 6.235149080811617e+27, 1.6948892444103338e+28, 4.607186634331292e+28, + 1.2523631708422137e+29, 3.404276049931741e+29, 9.253781725587787e+29, + 2.515438670919167e+30, 6.837671229762744e+30, 1.8586717452841279e+31, + 5.052393630276104e+31, 1.3733829795401761e+32, 3.7332419967990015e+32, + 1.0148003881138887e+33, 2.7585134545231703e+33, 7.498416996990121e+33, + 2.0382810665126688e+34, 5.54062238439351e+34, 1.5060973145850306e+35, + 4.0939969621274545e+35, 1.1128637547917594e+36, 3.0250773222011426e+36, + 8.223012714622913e+36, 2.235246603734715e+37, 6.076030225056872e+37, + 1.6516362549940018e+38, 4.4896128191743455e+38, 1.2204032943178408e+39, + 3.317400098335743e+39, 9.017628405034299e+39, 2.451245542920086e+40, + 6.663176216410896e+40, 1.8112390828890233e+41, 4.923458286012058e+41, + 1.3383347192042695e+42, 3.637970947608805e+42, 9.889030319346946e+42, + 2.6881171418161356e+43, 7.307059979368067e+43, 1.9862648361376543e+44, + 5.399227610580169e+44, 1.4676622301554424e+45, 3.989519570547216e+45, + 1.0844638552900231e+46, 2.947878391455509e+46, 8.013164264000591e+46, + 2.1782038807290206e+47, 5.92097202766467e+47, 1.609487066961518e+48, + 4.375039447261341e+48, 1.189259022828201e+49, 3.2327411910848595e+49, + 8.787501635837023e+49, 2.3886906014249913e+50, 6.493134255664462e+50, + 1.7650168856917655e+51, 4.797813327299302e+51, 1.3041808783936323e+52, + 3.5451311827611664e+52, 9.636665673603202e+52, 2.6195173187490626e+53, + 7.120586326889338e+53, 1.9355760420357226e+54, 5.261441182666386e+54, + 1.4302079958348105e+55, 3.887708405994595e+55, 1.0567887114362587e+56, + 2.872649550817832e+56, 7.808671073519151e+56, 2.1226168683560893e+57, + 5.769870862033003e+57, 1.568413511681964e+58, 4.263389948314721e+58, + 1.1589095424138854e+59, 3.150242749971452e+59, 8.56324762248225e+59, + 2.3277320404788622e+60, 6.327431707155585e+60, 1.7199742630376623e+61, + 4.675374784632515e+61, 1.2708986318302189e+62, 3.454660656717546e+62, + 9.390741286647697e+62, 2.5526681395254553e+63, 6.938871417758404e+63, + 1.886180808490652e+64, 5.12717101690833e+64, 1.3937095806663797e+65, + 3.788495427274696e+65, 1.0298198277160991e+66, 2.799340524267497e+66, + 7.609396478785354e+66, 2.0684484173822473e+67, 5.622625746075033e+67, + 1.5283881393781746e+68, 4.154589706104022e+68, 1.129334570280557e+69, + 3.0698496406442424e+69, 8.344716494264775e+69, 2.2683291210002403e+70, + 6.165957830579433e+70, 1.6760811125908828e+71, 4.556060831379215e+71, + 1.2384657367292132e+72, 3.366498907320164e+72, 9.151092805295634e+72, + 2.487524928317743e+73, 6.761793810485009e+73, 1.8380461242828246e+74, + 4.996327379507578e+74, 1.358142592474785e+75, 3.691814329580466e+75, + 1.0035391806143295e+76, 2.7279023188106115e+76, 7.415207303034179e+76, + 2.0156623266094611e+77, 5.47913827473198e+77, 1.4893842007818383e+78, + 4.048566008579269e+78, 1.1005143412437996e+79, 2.991508135761597e+79, + 8.131762205128143e+79, 2.2104421435549888e+80, 6.008604711685586e+80, + 1.633308100216833e+81, 4.4397917290943824e+81, 1.2068605179340022e+82, + 3.2805870153846705e+82, 8.917560070598843e+82, 2.4240441494100796e+83, + 6.589235162723882e+83, 1.7911398206275708e+84, 4.8688228266413195e+84, + 1.3234832615645704e+85, 3.5976005001806814e+85, 9.779292065696318e+85, + 2.658287191737602e+86, 7.225973768125749e+86, 1.964223318681796e+87, + 5.339312554208246e+87, 1.4513756292567526e+88, 3.945247999276943e+88, + 1.0724295945198918e+89, 2.9151658790851237e+89, 7.924242436060931e+89, + 2.1540324218248465e+90, 5.855267190158109e+90, 1.5916266403779241e+91, + 4.326489774230631e+91, 1.1760618534305e+92, 3.196867565323994e+92, + 8.689987010810322e+92, 2.3621833781030834e+93, 6.421080152185614e+93, + 1.7454305496765193e+94, 4.744572146022966e+94, 1.2897084248347162e+95, + 3.505790975238748e+95, 9.529727902367202e+95, 2.5904486187163903e+96, + 7.041569407813597e+96, 1.9140970165092822e+97, 5.2030551378848545e+97, + 1.4143370233782872e+98, 3.844566629966054e+98, 1.0450615608536755e+99, + 2.840771850489593e+99, 7.722018499983836e+99, 2.0990622567530634e+100, + 5.705842789336087e+100, 1.551008877029636e+101, 4.216079246208329e+101, + 1.146049160231141e+102, 3.115284606777059e+102, 8.468221537080262e+102, + 2.30190127236108e+103, 6.25721639956588e+103, 1.700887763567586e+104, + 4.6234922999541146e+104, 1.2567955102985587e+105, 3.416324397733485e+105, + 9.286532530480224e+105, 2.5243412626998188e+106, 6.861870983226278e+106, + 1.8652499202934394e+107, 5.070274963868339e+107, 1.3782436299574147e+108, + 3.7464546145026734e+108, 1.0183919499749154e+109, 2.7682763318657856e+109, + 7.524955249064026e+109, 2.045494911349825e+110, 5.5602316477276757e+110, + 1.5114276650041035e+111, 4.10848635681094e+111, 1.1168023806191083e+112, + 3.0357836172167243e+112, 8.25211544181389e+112, 2.2431575451828986e+113, + 6.0975343934414735e+113, 1.6574816940096004e+114, 4.505502369829812e+114, + 1.2247225219987542e+115, 3.329140976453747e+115, 9.049543420672623e+115, + 2.45992094362655e+116, 6.686758400505878e+116, 1.8176493851391e+117, + 4.940883294133372e+117, 1.3430713274979614e+118, 3.6508463838620755e+118, + 9.924029383747696e+118, 2.6976308738934977e+119, 7.33292098439479e+119, + 1.993294586140637e+120, 5.418336452271886e+120, 1.472856551868792e+121, + 4.0036392008717847e+121, 1.0883019687436065e+122, 2.9583114655119493e+122, + 8.04152429962318e+122, 2.185912937677754e+123, 5.941927417082968e+123, + 1.6151833323879222e+124, 4.390523502060015e+124, 1.1934680253072109e+125, + 3.2441824460394912e+125, 8.818602191274965e+125, 2.3971446088951858e+126, + 6.516114630548348e+126, 1.77126359923757e+127, 4.814793655218451e+127, + 1.3087966100760222e+128, 3.55767804231845e+128, 9.670771573941992e+128, + 2.6287882636624796e+129, 7.145787367980123e+129, 1.9424263952412558e+130, + 5.280062373303513e+130, 1.435269760248128e+131, 3.901467708219257e+131, + 1.0605288775572162e+132, 2.882816376419849e+132, 7.836307370806225e+132, + 2.130129192828224e+133, 5.790291477135095e+133, 1.5739644103777611e+134, + 4.278478855371123e+134, 1.1630111326001581e+135, 3.161392028042583e+135, + 8.593554502463442e+135, 2.3359703045918785e+136, 6.349825630792043e+136, + 1.7260615626065507e+137, 4.691921780435012e+137, 1.2753965716307703e+138, + 3.4668873247428877e+138, 9.423976816163585e+138, 2.56170249311968e+139, + 6.963429336965459e+139, 1.8928563430431824e+140, 5.145317001177723e+140, + 1.3986421705962793e+141, 3.801903596848382e+141, 1.0334645460866042e+142, + 2.8092478959838913e+142, 7.636327507289818e+142, 2.075769029922787e+143, + 5.642525234117172e+143, 1.533797381052233e+144, 4.169293549452358e+144, + 1.133331489298786e+145, 3.080714392981317e+145, 8.374249953113352e+145, + 2.2763571474522036e+146, 6.187780269002192e+146, 1.682013066372608e+147, + 4.572185553551339e+147, 1.2428488906561565e+148, 3.378413554991113e+148, + 9.183480175552067e+148, 2.4963287283217065e+149, 6.785725020057171e+149, + 1.8445513014941297e+150, 5.014010284511975e+150, 1.362949304409567e+151, + 3.7048803272874213e+151, 1.0070908870280797e+152, 2.7375568578151306e+152, + 7.441451060972311e+152, 2.0227961196408315e+153, 5.498529934697141e+153, + 1.4946554004725342e+154, 4.062894614912666e+154, 1.1044092602661211e+155, + 3.0020956233632933e+155, 8.16054198028487e+155, 2.2182652975385555e+156, + 6.0298702490003525e+156, 1.6390886725823477e+157, 4.4555049539136534e+157, + 1.211131815283274e+158, 3.2921976053531405e+158, 8.949120926327824e+158, + 2.4326232794719504e+159, 6.612555656075053e+159, 1.7974789879582895e+160, + 4.8860544700039736e+160, 1.3281673078672893e+161, 3.6103330581290227e+161, + 9.813902746597095e+161, 2.66769535023392e+162, 7.251547794405553e+162, + 1.9711750597734883e+163, 5.358209345693946e+163, 1.4565123097479284e+164, + 3.959210944514706e+164, 1.0762251165510499e+165, 2.9254831776519365e+165, + 7.952287761273885e+165, 2.161655931614806e+166, 5.875990038289236e+166, + 1.5972596945288e+167, 4.3418020029676826e+167, 1.1802241487434137e+168, + 3.2081818570377667e+168, 8.720742444377757e+168, 2.370543571722357e+169, + 6.443805514583285e+169, 1.7516079436415928e+170, 4.7613640437854577e+170, + 1.2942729358900287e+171, 3.518198602696204e+171, 9.563455330619095e+171, + 2.5996166842501676e+172, 7.066490793756186e+172, 1.9208713515640576e+173, + 5.221469689764144e+173, 1.419342617553556e+174, 3.858173245653328e+174, + 1.0487602224706297e+175, 2.8508258551525784e+175, 7.749348118162471e+175, + 2.1064912172004347e+176, 5.726036797524517e+176, 1.556498177579872e+177, + 4.231000712144986e+177, 1.1501052352020995e+178, 3.1263101616654833e+178, + 8.498192102582143e+178, 2.3100481167203208e+179, 6.279361818546888e+179, + 1.7069075125675549e+180, 4.639855674272614e+180, 1.2612435366047836e+181, + 3.428415386814204e+181, 9.31939924638644e+181, 2.5332753623607178e+182, + 6.886156383988143e+182, 1.8718513766522217e+183, 5.088219582729782e+183, + 1.3831214830943832e+184, 3.759713994046786e+184, 1.0219962230220558e+185, + 2.778073761794632e+185, 7.551587424805211e+185, 2.052734287286784e+186, + 5.579910311786494e+186, 1.5167768804960472e+187, 4.123027032079202e+187, + 1.1207549459546325e+188, 3.046527803744077e+188, 8.281321168812768e+188, + 2.2510964848816967e+189, 6.119114668961948e+189, 1.663347821089645e+190, + 4.521448156474929e+190, 1.229057036206545e+191, 3.340923407659982e+191, + 9.0815713893156e+191, 2.4686270481430163e+192, 6.710424046209653e+192, + 1.8240823746066321e+193, 4.958369972505633e+193, 1.3478246995039038e+194, + 3.663767388609735e+194, 9.959152316158692e+194, 2.7071782767869983e+195, + 7.35887351618917e+195, 2.000349215698554e+196, 5.437512923605682e+196, + 1.4780692572248542e+197, 4.017808803118279e+197, 1.0921536659739205e+198, + 2.968781464101838e+198, 8.069984706534065e+198, 2.193649278371395e+199, + 5.962956971409261e+199, 1.6208997579264978e+200, 4.4060623577252635e+200, + 1.1976919242062002e+201, 3.255664193661862e+201, 8.849812817195809e+201, + 2.405628536624732e+202, 6.539176337129533e+202, 1.777532421030859e+203, + 4.831834079584997e+203, 1.3134286776665033e+204, 3.5702693074778485e+204, + 9.704998181222095e+204, 2.6380920201244107e+205, 7.1710776001069995e+205, + 1.9493009930840557e+206, 5.298749467697559e+206, 1.4403494391599313e+207, + 3.9152757070996186e+207, 1.0642822808016033e+208, 2.8930191842539453e+208, + 7.86404147794091e+208, 2.137668104773499e+209, 5.810784364482288e+209, + 1.5795349547066147e+210, 4.2936211647948715e+210, 1.167127239054906e+211, + 3.172580765422527e+211, 8.623968643966744e+211, 2.3442377254095393e+212, + 6.372298810568915e+212, 1.732170406228067e+213, 4.708527339044277e+213, + 1.279910430452668e+214, 3.4791572651546824e+214, 9.457329972221242e+214, + 2.5707688209230085e+215, 6.9880741710841e+215, 1.8995555035181914e+216, + 5.1635272073628715e+216, 1.4035922178528375e+217, 3.8153592203558975e+217, + 1.0371221637737106e+218, 2.8191903316782035e+218, 7.663353849568289e+218, + 2.0831155514333153e+219, 5.662495150041624e+219, 1.5392257670095623e+220, + 4.184049432358029e+220, 1.1373425541353215e+221, 3.091617597639242e+221, + 8.403887936206959e+221, 2.2844135865397565e+222, 6.209679940975975e+222, + 1.687966014410163e+223, 4.588367344027585e+223, 1.2472475573565076e+224, + 3.3903703707521256e+224, 9.215982170561459e+224, 2.505163686563976e+225, + 6.809740926502327e+225, 1.851079501702514e+226, 5.031755772510968e+226, + 1.367773028166047e+227, 3.7179925679201674e+227, 1.0106551635723174e+228, + 2.7472455659769343e+228, 7.467787700309786e+228, 2.0299551604542052e+229, + 5.517990225249331e+229, 1.4999452558909891e+230, 4.077273932771829e+230, + 1.1083179641103409e+231, 3.0127205819958637e+231, 8.189423612263916e+231, + 2.2261161390770435e+232, 6.051211048892536e+232, 1.644889703437518e+233, + 4.471273790673593e+233, 1.2154182295253221e+234, 3.3038492872965484e+234, + 8.980793481625574e+234, 2.441232772624624e+235, 6.635958684864208e+235, + 1.803840590747136e+236, 4.903347099264769e+236, 1.3328679318558793e+237, + 3.6231106788996255e+237, 9.848635920948766e+237, 2.6771368059024047e+238, + 7.277212331783397e+238, 1.9781514043324884e+239, 5.377173016337745e+239, + 1.4616671698791204e+240, 3.9732233071375736e+240, 1.0800340716202018e+241, + 2.935836991001829e+241, 7.980432343958154e+241, 2.1693064223828275e+242, + 5.896786228322743e+242, 1.6029126850757262e+243, 4.357168424447843e+243, + 1.1844011751712099e+244, 3.219536192073438e+244, 8.751606726979457e+244, + 2.37893335357682e+245, 6.4666113061430065e+245, 1.757807200519635e+246, + 4.778215371106989e+246, 1.298853601574382e+247, 3.5306501429882274e+247, + 9.597302126331227e+247, 2.608817197223753e+248, 7.091500380984786e+248, + 1.9276696622141338e+249, 5.239949414068466e+249, 1.4243659274306933e+250, + 3.871828017611069e+250, 1.0524719743190776e+251, 2.860915442753964e+251, + 7.776774460795963e+251, 2.1139464700806057e+252, 5.746302275955253e+252, + 1.5620069057562017e+253, 4.245974987844624e+253, 1.1541756653549656e+254, + 3.137374737984031e+254, 8.52826873932845e+254, 2.3182237942331857e+255, + 6.301585614165449e+255, 1.7129485665464872e+256, 4.656276961528286e+256, + 1.2657073052794837e+257, 3.440549168089086e+257, 9.352382283536447e+257, + 2.5422410814139436e+258, 6.910527735169595e+258, 1.878476196757375e+259, + 5.106227710838431e+259, 1.3880165998346134e+260, 3.7730203009299397e+260, + 1.0256132522424933e+261, 2.787905866597553e+261, 7.578313856626495e+261, + 2.059999284682719e+262, 5.599658622191666e+262, 1.522145027827762e+263, + 4.1376191694234934e+263, 1.124721500132769e+264, 3.0573100158881035e+264, + 8.310630260154467e+264, 2.2590635219219752e+265, 6.140771320975197e+265, + 1.6692347094529326e+266, 4.537450378139021e+266, 1.2334068910429924e+267, + 3.352747539018332e+267, 9.113712710724316e+267, 2.4773639651358133e+268, + 6.734173448907929e+268, 1.83053813158578e+269, 4.975918539390998e+269, + 1.3525948945519025e+270, 3.676734123126915e+270, 9.994399554971195e+270, + 2.7167594696637367e+271, 7.384917898680968e+271, 2.007428812864643e+272, + 5.456757263935073e+272, 1.4833004112866607e+273, 4.032028554146358e+273, + 1.0960189950564043e+274, 2.9792885179077677e+274, 8.098545839965366e+274, + 2.201412999372045e+275, 5.984060953126553e+275, 1.6266364149275224e+276, + 4.421656208207252e+276, 1.2019307722462898e+277, 3.2671865772628366e+277, + 8.881133903158874e+277, 2.414142490506832e+278, 6.562319663255584e+278, + 1.7838234293167135e+279, 4.848934813091121e+279, 1.318077138980805e+280, + 3.58290513539881e+280, 9.73934592264718e+280, 2.6474287042608523e+281, + 7.19645733893315e+281, 1.956199921370272e+282, 5.317502699093823e+282, + 1.4454470959728667e+283, 3.9291325749819406e+283, 1.0680489680179907e+284, + 2.9032581016677402e+284, 7.891873741089921e+284, 2.1452336982897837e+285, + 5.831349779859113e+285, 1.585125214197968e+286, 4.308817065586588e+286, + 1.1712579131538248e+287, 3.1838091017649045e+287, 8.654490426610056e+287, + 2.3525344061226884e+288, 6.394851526987996e+288, 1.7383008701505047e+289, + 4.725191667724663e+289, 1.2844402646362043e+290, 3.491470631101721e+290, + 9.490801171122244e+290, 2.579867236097942e+291, 7.012806227721897e+291, + 1.9062783735320858e+292, 5.181801862756733e+292, 1.4085597842206858e+293, + 3.828862465745284e+293, 1.04079272643043e+294, 2.829167955448184e+294, + 7.690475842953428e+294, 2.090488073610356e+295, 5.682535743105387e+295, + 1.5446733650052388e+296, 4.198857538998427e+296, 1.1413678148547691e+297, + 3.1025593907077266e+297, 8.433630813475781e+297, 2.2924985388203488e+298, + 6.231657119844268e+298, 1.6939400310060103e+299, 4.60460640478299e+299, + 1.2516617917327736e+300, 3.4023695038436884e+300, 9.248599196001516e+300, + 2.5140299133191857e+301, 6.833841829578011e+301, 1.8576308063905224e+302, + 5.049564064997079e+302, 1.372613823952135e+303, 3.7311512151407716e+303, + 1.0142320547350045e+304, 2.7569685642268427e+304, 7.49421754977065e+304, + 2.037139538406043e+305, 5.5375193892845935e+305, 1.505253833063194e+306, + 4.0917041416340054e+306, 1.1122405015634333e+307, 3.023383144276055e+307, + 8.218407461554972e+307 +}; + +/*unused +// [exp(i/256.) for i in range(-256, 256)] +static double EXP_TABLE_r256[512] = { 0.36787944117144233, + 0.3693192805940405, + 0.3707647553888037, + 0.372215887611955, + 0.373672699406043, + 0.37513521300027985, + 0.3766034507108804, + 0.3780774349414026, + 0.3795571881830896, + 0.3810427330152126, + 0.3825340921054156, + 0.3840312882100615, + 0.3855343441745787, + 0.3870432829338104, + 0.3885581275123641, + 0.39007890102496307, + 0.391605626676799, + 0.39313832776388624, + 0.3946770276734171, + 0.3962217498841188, + 0.39777251796661167, + 0.39932935558376886, + 0.4008922864910774, + 0.4024613345370006, + 0.4040365236633421, + 0.40561787790561105, + 0.407205421393389, + 0.4087991783506979, + 0.41039917309637, + 0.41200543004441853, + 0.41361797370441067, + 0.4152368286818413, + 0.4168620196785084, + 0.4184935714928901, + 0.42013150902052315, + 0.4217758572543825, + 0.4234266412852628, + 0.42508388630216154, + 0.4267476175926629, + 0.42841786054332404, + 0.43009464064006225, + 0.43177798346854385, + 0.4334679147145746, + 0.4351644601644917, + 0.4368676457055573, + 0.43857749732635326, + 0.440294041117178, + 0.44201730327044453, + 0.44374731008107987, + 0.4454840879469266, + 0.4472276633691456, + 0.4489780629526202, + 0.4507353134063624, + 0.4524994415439203, + 0.4542704742837873, + 0.4560484386498127, + 0.45783336177161427, + 0.459625270884992, + 0.4614241933323439, + 0.463230156563083, + 0.4650431881340563, + 0.46686331570996537, + 0.4686905670637882, + 0.47052497007720323, + 0.4723665527410147, + 0.4742153431555798, + 0.47607136953123724, + 0.47793466018873804, + 0.4798052435596775, + 0.48168314818692903, + 0.4835684027250795, + 0.485461035940867, + 0.4873610767136191, + 0.48926855403569414, + 0.4911834970129232, + 0.49310593486505433, + 0.4950358969261986, + 0.49697341264527733, + 0.49891851158647194, + 0.5008712234296745, + 0.5028315779709409, + 0.5047996051229459, + 0.5067753349154387, + 0.5087587974957017, + 0.5107500231290107, + 0.5127490421990961, + 0.514755885208607, + 0.5167705827795767, + 0.5187931656538893, + 0.5208236646937497, + 0.5228621108821537, + 0.5249085353233612, + 0.5269629692433709, + 0.5290254439903966, + 0.5310959910353452, + 0.5331746419722976, + 0.5352614285189903, + 0.5373563825172994, + 0.5394595359337269, + 0.5415709208598878, + 0.5436905695130004, + 0.5458185142363775, + 0.54795478749992, + 0.5500994219006123, + 0.5522524501630204, + 0.5544139051397897, + 0.556583819812148, + 0.5587622272904076, + 0.5609491608144708, + 0.5631446537543375, + 0.5653487396106142, + 0.5675614520150244, + 0.569782824730923, + 0.572012891653811, + 0.5742516868118521, + 0.5764992443663932, + 0.5787555986124843, + 0.5810207839794026, + 0.583294835031178, + 0.5855777864671197, + 0.5878696731223465, + 0.5901705299683179, + 0.5924803921133679, + 0.5947992948032403, + 0.5971272734216274, + 0.5994643634907089, + 0.6018106006716945, + 0.604166020765368, + 0.6065306597126334, + 0.6089045535950636, + 0.6112877386354506, + 0.6136802511983586, + 0.6160821277906783, + 0.6184934050621846, + 0.6209141198060958, + 0.6233443089596343, + 0.6257840096045911, + 0.628233258967891, + 0.6306920944221607, + 0.6331605534862997, + 0.635638673826052, + 0.6381264932545812, + 0.6406240497330474, + 0.6431313813711866, + 0.645648526427892, + 0.648175523311798, + 0.6507124105818659, + 0.6532592269479727, + 0.6558160112715016, + 0.6583828025659347, + 0.6609596399974489, + 0.663546562885513, + 0.6661436107034878, + 0.6687508230792285, + 0.6713682397956895, + 0.673995900791531, + 0.676633846161729, + 0.6792821161581865, + 0.6819407511903481, + 0.6846097918258168, + 0.6872892787909722, + 0.6899792529715928, + 0.6926797554134794, + 0.6953908273230813, + 0.6981125100681258, + 0.7008448451782485, + 0.7035878743456275, + 0.7063416394256196, + 0.7091061824373984, + 0.7118815455645965, + 0.7146677711559482, + 0.717464901725936, + 0.7202729799554398, + 0.7230920486923872, + 0.7259221509524082, + 0.7287633299194912, + 0.7316156289466418, + 0.7344790915565446, + 0.7373537614422269, + 0.7402396824677261, + 0.7431368986687583, + 0.7460454542533906, + 0.7489653936027156, + 0.7518967612715286, + 0.7548396019890073, + 0.7577939606593946, + 0.7607598823626837, + 0.7637374123553055, + 0.76672659607082, + 0.7697274791206092, + 0.7727401072945725, + 0.7757645265618263, + 0.7788007830714049, + 0.7818489231529648, + 0.7849089933174918, + 0.7879810402580102, + 0.791065110850296, + 0.7941612521535917, + 0.7972695114113244, + 0.8003899360518268, + 0.8035225736890608, + 0.806667472123344, + 0.8098246793420792, + 0.812994243520487, + 0.8161762130223398, + 0.8193706364007008, + 0.8225775623986646, + 0.8257970399501007, + 0.8290291181804004, + 0.8322738464072263, + 0.835531274141265, + 0.8388014510869826, + 0.8420844271433824, + 0.8453802524047673, + 0.8486889771615039, + 0.8520106519007895, + 0.8553453273074225, + 0.8586930542645764, + 0.8620538838545757, + 0.8654278673596753, + 0.8688150562628432, + 0.8722155022485462, + 0.8756292572035382, + 0.8790563732176524, + 0.8824969025845955, + 0.885950897802746, + 0.8894184115759556, + 0.8928994968143528, + 0.8963942066351505, + 0.8999025943634562, + 0.9034247135330867, + 0.9069606178873836, + 0.9105103613800342, + 0.9140739981758944, + 0.9176515826518158, + 0.9212431693974745, + 0.9248488132162048, + 0.9284685691258352, + 0.9321024923595276, + 0.9357506383666208, + 0.9394130628134758, + 0.9430898215843259, + 0.9467809707821289, + 0.9504865667294234, + 0.9542066659691884, + 0.9579413252657053, + 0.9616906016054253, + 0.9654545521978378, + 0.9692332344763441, + 0.9730267060991332, + 0.976835024950062, + 0.9806582491395386, + 0.9844964370054085, + 0.9883496471138451, + 0.9922179382602435, + 0.9961013694701175, + 1.0, + 1.0039138893383475, + 1.007843097206448, + 1.0117876835593316, + 1.0157477085866857, + 1.0197232327137742, + 1.023714316602358, + 1.0277210211516217, + 1.0317434074991028, + 1.035781537021624, + 1.03983547133623, + 1.0439052723011284, + 1.0479910020166328, + 1.0520927228261099, + 1.056210497316932, + 1.0603443883214314, + 1.0644944589178593, + 1.0686607724313482, + 1.0728433924348775, + 1.0770423827502449, + 1.0812578074490395, + 1.0854897308536195, + 1.0897382175380932, + 1.0940033323293055, + 1.0982851403078258, + 1.1025837068089421, + 1.1068990974236574, + 1.1112313779996905, + 1.1155806146424807, + 1.1199468737161973, + 1.1243302218447506, + 1.1287307259128108, + 1.1331484530668263, + 1.1375834707160497, + 1.1420358465335656, + 1.1465056484573242, + 1.1509929446911764, + 1.1554978037059165, + 1.160020294240325, + 1.1645604853022191, + 1.1691184461695043, + 1.1736942463912328, + 1.1782879557886632, + 1.1828996444563278, + 1.1875293827631006, + 1.1921772413532716, + 1.1968432911476248, + 1.2015276033445204, + 1.2062302494209807, + 1.2109513011337818, + 1.2156908305205474, + 1.2204489099008486, + 1.2252256118773075, + 1.2300210093367046, + 1.234835175451091, + 1.2396681836789056, + 1.2445201077660952, + 1.2493910217472401, + 1.2542809999466837, + 1.259190116979667, + 1.2641184477534664, + 1.2690660674685372, + 1.274033051619661, + 1.2790194759970976, + 1.2840254166877414, + 1.289050950076283, + 1.2940961528463732, + 1.2991611019817952, + 1.3042458747676378, + 1.3093505487914747, + 1.3144752019445491, + 1.3196199124229622, + 1.3247847587288655, + 1.3299698196716598, + 1.3351751743691969, + 1.3404009022489867, + 1.3456470830494105, + 1.3509137968209362, + 1.3562011239273402, + 1.3615091450469345, + 1.3668379411737963, + 1.3721875936190053, + 1.3775581840118836, + 1.3829497943012412, + 1.3883625067566268, + 1.3937964039695832, + 1.3992515688549068, + 1.4047280846519141, + 1.4102260349257107, + 1.4157455035684665, + 1.4212865748006966, + 1.4268493331725458, + 1.4324338635650782, + 1.4380402511915735, + 1.4436685815988268, + 1.449318940668454, + 1.4549914146182013, + 1.4606860900032628, + 1.4664030537175992, + 1.4721423929952648, + 1.4779041954117385, + 1.4836885488852596, + 1.4894955416781699, + 1.4953252623982605, + 1.5011778000001228, + 1.5070532437865074, + 1.5129516834096854, + 1.5188732088728176, + 1.5248179105313266, + 1.5307858790942768, + 1.5367772056257567, + 1.542791981546271, + 1.5488302986341331, + 1.5548922490268668, + 1.5609779252226124, + 1.5670874200815372, + 1.573220826827253, + 1.5793782390482385, + 1.5855597506992676, + 1.5917654561028425, + 1.5979954499506333, + 1.604249827304923, + 1.6105286836000576, + 1.6168321146439029, + 1.6231602166193055, + 1.6295130860855618, + 1.63589081997989, + 1.64229351561891, + 1.6487212707001282, + 1.6551741833034281, + 1.6616523518925677, + 1.66815587531668, + 1.674684852811784, + 1.6812393840022961, + 1.6878195689025528, + 1.694425507918335, + 1.7010573018484008, + 1.7077150518860233, + 1.7143988596205357, + 1.72110882703888, + 1.7278450565271632, + 1.7346076508722215, + 1.7413967132631865, + 1.7482123472930606, + 1.7550546569602985, + 1.7619237466703928, + 1.7688197212374674, + 1.7757426858858776, + 1.782692746251815, + 1.7896700083849195, + 1.7966745787498977, + 1.8037065642281471, + 1.8107660721193872, + 1.8178532101432967, + 1.8249680864411575, + 1.832110809577504, + 1.8392814885417808, + 1.8464802327500045, + 1.8537071520464343, + 1.8609623567052476, + 1.8682459574322223, + 1.8755580653664274, + 1.882898792081917, + 1.8902682495894338, + 1.8976665503381187, + 1.9050938072172259, + 1.9125501335578454, + 1.9200356431346326, + 1.9275504501675447, + 1.9350946693235824, + 1.9426684157185412, + 1.9502718049187666, + 1.957904952942918, + 1.9655679762637392, + 1.9732609918098354, + 1.980984116967457, + 1.988737469582292, + 1.9965211679612622, + 2.0043353308743312, + 2.0121800775563137, + 2.0200555277086965, + 2.0279618015014655, + 2.0358990195749382, + 2.0438673030416052, + 2.0518667734879767, + 2.059897552976441, + 2.067959764047124, + 2.0760535297197595, + 2.0841789734955687, + 2.092336219359141, + 2.1005253917803293, + 2.1087466157161465, + 2.117000016612675, + 2.1252857204069766, + 2.13360385352902, + 2.1419545429036044, + 2.1503379159523, + 2.1587541005953903, + 2.1672032252538247, + 2.175685418851178, + 2.184200810815618, + 2.1927495310818794, + 2.2013317100932475, + 2.2099474788035476, + 2.218596968679145, + 2.2272803117009485, + 2.2359976403664263, + 2.244749087691627, + 2.2535347872132085, + 2.262354872990478, + 2.2712094796074336, + 2.280098742174823, + 2.289022796332201, + 2.29798177825, + 2.3069758246316097, + 2.3160050727154613, + 2.325069660277121, + 2.334169725631395, + 2.3433054076344373, + 2.35247684568587, + 2.3616841797309096, + 2.3709275502625027, + 2.3802070983234693, + 2.389522965508657, + 2.398875293967098, + 2.408264226404181, + 2.417689906083828, + 2.4271524768306802, + 2.4366520830322917, + 2.4461888696413343, + 2.455762982177808, + 2.4653745667312625, + 2.475023769963025, + 2.4847107391084404, + 2.4944356219791146, + 2.504198566965173, + 2.5139997230375233, + 2.523839239750129, + 2.53371726724229, + 2.5436339562409365, + 2.553589458062927, + 2.563583924617356, + 2.5736175084078754, + 2.5836903625350174, + 2.593802640698535, + 2.6039544971997426, + 2.6141460869438746, + 2.6243775654424475, + 2.6346490888156313, + 2.6449608137946337, + 2.655312897724092, + 2.665705498564471, + 2.676138774894477, + 2.686612885913475, + 2.6971279914439186, + 2.70768425193379 }; +*/ +#endif diff --git a/src/3rd_party/fast_log/src/log.py b/src/3rd_party/fast_log/src/log.py new file mode 100644 index 00000000..9ef00486 --- /dev/null +++ b/src/3rd_party/fast_log/src/log.py @@ -0,0 +1,54 @@ +import numpy as np +from scipy.optimize import curve_fit +import matplotlib.pyplot as plt +from math import log + +# Generate the training data: +start = 0.5 +end = 1 +start_poly = start - 0.1 +end_poly = end + 0.1 +x = np.arange(start_poly, end_poly, 0.001) +y = np.log(np.arange(start_poly, end_poly, 0.001))/np.log(2) + +# The polynomial to fit: +def func1(x, a, b, c, d): + return a * x**3 + b*x**2 + c*x + d + +# The sollya expression: +# display = decimal; Q = fpminimax(log(x)/log(2), 3, [|D...|], [0.5, 1.0]); Q; +def func2(x, a, b, c, d): + return -3.10688310292421920877359298174269497394561767578125 + x * (5.8216322434128127127905827364884316921234130859375 + x * (-3.75924393052939986858973497874103486537933349609375 + x * 1.04449239329354615080092116841115057468414306640625)) + +params, _ = curve_fit(func1, x, y) +a, b, c, d = params[0], params[1], params[2], params[3] +yfit1 = func1(x, a, b, c, d) +yerr1 = yfit1 - y +yfit2 = func2(x, a, b, c, d) +yerr2 = yfit2 - y + +print("The polynomial parameters:", params) + +fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(5, 3)) +p0 = axes[0] +p1 = axes[1] +min_fit = min(min(yerr1), min(yfit2)) +max_fit = min(max(yfit1), max(yfit2)) +p0.plot(x, y, 'o', label="log") +p0.plot(x, yfit1, label="curve_fit", color="red") +p0.plot(x, yfit2, label="minimax", color="gold") +p0.vlines(x = start, ymin = min_fit, ymax = max_fit, colors = 'purple') +p0.vlines(x = end, ymin = min_fit, ymax = max_fit, colors = 'purple') +p0.legend(loc='best', fancybox=True) +p0.grid(True) + +min_err = min(min(yerr1), min(yerr2)) +max_err = min(max(yerr1), max(yerr2)) +p1.plot(x, yerr1, label="curve_fit error", color="red") +p1.plot(x, yerr2, label="minimax error", color="gold") +p1.legend(loc='best', fancybox=True) +p1.vlines(x = start, ymin = min_err, ymax = max_err, colors = 'purple') +p1.vlines(x = end, ymin = min_err, ymax = max_err, colors = 'purple') +p1.grid(True) +fig.tight_layout() +plt.show() diff --git a/src/3rd_party/fast_log/src/log.sollya b/src/3rd_party/fast_log/src/log.sollya new file mode 100644 index 00000000..30d2a1e9 --- /dev/null +++ b/src/3rd_party/fast_log/src/log.sollya @@ -0,0 +1,3 @@ +display = decimal; Q = fpminimax(log(x)/log(2), 3, [|D...|], [0.40, 1.10]); Q; +display = decimal; Q = fpminimax(log(x+1.0), 5, [|D...|], [-0.000000001, 0.0101]); Q; + diff --git a/src/3rd_party/fast_log/src/log_accurate.cc b/src/3rd_party/fast_log/src/log_accurate.cc new file mode 100644 index 00000000..e5b475e7 --- /dev/null +++ b/src/3rd_party/fast_log/src/log_accurate.cc @@ -0,0 +1,294 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +/// @returns the exponent and a normalized mantissa with the relationship: +/// [m * 2^E] = x, where m is in [1..2]. +/// This is similar to frexp(), except that the range for m is +/// in [1..2] and not [0.5 ..1]. +std::pair reduce_fp32(float x) { + uint32_t bits = bit_cast(x); + if (bits == 0) { + return { 0., 0 }; + } + // See: + // https://en.wikipedia.org/wiki/IEEE_754#Basic_and_interchange_formats + + // Extract the 23-bit mantissa field. + uint64_t mantissa = bits & 0x7FFFFF; + bits >>= 23; + + // Extract the 8-bit exponent field, and add the bias. + int exponent = int(bits & 0xff); + int normalized_exponent = exponent - 127; + bits >>= 8; + + // Handle denormals. + if (exponent == 0) { + // Scale the number to a manageable scale. + auto r = reduce_fp32(x * 0x1p32); + r.second -= 32; + return r; + } + + // Extract the sign bit. + uint64_t sign = bits; + bits >>= 1; + + // Construct the normalized double; + uint64_t res = sign; + res <<= 11; + res |= 127; + res <<= 23; + res |= mantissa; + + float frac = bit_cast(res); + return { frac, normalized_exponent }; +} + +// Compute the reciprocal of \p y in the range [sqrt(2)/2 .. sqrt(2)]. +double recip_of_masked(float x) { + uint64_t masked_recp_table[256] = { + 0x4000000000000000, 0x3fffc07f01fc07f0, 0x3fff81f81f81f820, 0x3fff44659e4a4271, + 0x3fff07c1f07c1f08, 0x3ffecc07b301ecc0, 0x3ffe9131abf0b767, 0x3ffe573ac901e574, + 0x3ffe1e1e1e1e1e1e, 0x3ffde5d6e3f8868a, 0x3ffdae6076b981db, 0x3ffd77b654b82c34, + 0x3ffd41d41d41d41d, 0x3ffd0cb58f6ec074, 0x3ffcd85689039b0b, 0x3ffca4b3055ee191, + 0x3ffc71c71c71c71c, 0x3ffc3f8f01c3f8f0, 0x3ffc0e070381c0e0, 0x3ffbdd2b899406f7, + 0x3ffbacf914c1bad0, 0x3ffb7d6c3dda338b, 0x3ffb4e81b4e81b4f, 0x3ffb2036406c80d9, + 0x3ffaf286bca1af28, 0x3ffac5701ac5701b, 0x3ffa98ef606a63be, 0x3ffa6d01a6d01a6d, + 0x3ffa41a41a41a41a, 0x3ffa16d3f97a4b02, 0x3ff9ec8e951033d9, 0x3ff9c2d14ee4a102, + 0x3ff999999999999a, 0x3ff970e4f80cb872, 0x3ff948b0fcd6e9e0, 0x3ff920fb49d0e229, + 0x3ff8f9c18f9c18fa, 0x3ff8d3018d3018d3, 0x3ff8acb90f6bf3aa, 0x3ff886e5f0abb04a, + 0x3ff8618618618618, 0x3ff83c977ab2bedd, 0x3ff8181818181818, 0x3ff7f405fd017f40, + 0x3ff7d05f417d05f4, 0x3ff7ad2208e0ecc3, 0x3ff78a4c8178a4c8, 0x3ff767dce434a9b1, + 0x3ff745d1745d1746, 0x3ff724287f46debc, 0x3ff702e05c0b8170, 0x3ff6e1f76b4337c7, + 0x3ff6c16c16c16c17, 0x3ff6a13cd1537290, 0x3ff6816816816817, 0x3ff661ec6a5122f9, + 0x3ff642c8590b2164, 0x3ff623fa77016240, 0x3ff6058160581606, 0x3ff5e75bb8d015e7, + 0x3ff5c9882b931057, 0x3ff5ac056b015ac0, 0x3ff58ed2308158ed, 0x3ff571ed3c506b3a, + 0x3ff5555555555555, 0x3ff5390948f40feb, 0x3ff51d07eae2f815, 0x3ff5015015015015, + 0x3ff4e5e0a72f0539, 0x3ff4cab88725af6e, 0x3ff4afd6a052bf5b, 0x3ff49539e3b2d067, + 0x3ff47ae147ae147b, 0x3ff460cbc7f5cf9a, 0x3ff446f86562d9fb, 0x3ff42d6625d51f87, + 0x3ff4141414141414, 0x3ff3fb013fb013fb, 0x3ff3e22cbce4a902, 0x3ff3c995a47babe7, + 0x3ff3b13b13b13b14, 0x3ff3991c2c187f63, 0x3ff3813813813814, 0x3ff3698df3de0748, + 0x3ff3521cfb2b78c1, 0x3ff33ae45b57bcb2, 0x3ff323e34a2b10bf, 0x3ff30d190130d190, + 0x3ff2f684bda12f68, 0x3ff2e025c04b8097, 0x3ff2c9fb4d812ca0, 0x3ff2b404ad012b40, + 0x3ff29e4129e4129e, 0x3ff288b01288b013, 0x3ff27350b8812735, 0x3ff25e22708092f1, + 0x3ff2492492492492, 0x3ff23456789abcdf, 0x3ff21fb78121fb78, 0x3ff20b470c67c0d9, + 0x3ff1f7047dc11f70, 0x3ff1e2ef3b3fb874, 0x3ff1cf06ada2811d, 0x3ff1bb4a4046ed29, + 0x3ff1a7b9611a7b96, 0x3ff19453808ca29c, 0x3ff1811811811812, 0x3ff16e0689427379, + 0x3ff15b1e5f75270d, 0x3ff1485f0e0acd3b, 0x3ff135c81135c811, 0x3ff12358e75d3033, + 0x3ff1111111111111, 0x3ff0fef010fef011, 0x3ff0ecf56be69c90, 0x3ff0db20a88f4696, + 0x3ff0c9714fbcda3b, 0x3ff0b7e6ec259dc8, 0x3ff0a6810a6810a7, 0x3ff0953f39010954, + 0x3ff0842108421084, 0x3ff073260a47f7c6, 0x3ff0624dd2f1a9fc, 0x3ff05197f7d73404, + 0x3ff0410410410410, 0x3ff03091b51f5e1a, 0x3ff0204081020408, 0x3ff0101010101010, + 0x3ff0000000000000, 0x3fefc07f01fc07f0, 0x3fef81f81f81f820, 0x3fef44659e4a4271, + 0x3fef07c1f07c1f08, 0x3feecc07b301ecc0, 0x3fee9131abf0b767, 0x3fee573ac901e574, + 0x3fee1e1e1e1e1e1e, 0x3fede5d6e3f8868a, 0x3fedae6076b981db, 0x3fed77b654b82c34, + 0x3fed41d41d41d41d, 0x3fed0cb58f6ec074, 0x3fecd85689039b0b, 0x3feca4b3055ee191, + 0x3fec71c71c71c71c, 0x3fec3f8f01c3f8f0, 0x3fec0e070381c0e0, 0x3febdd2b899406f7, + 0x3febacf914c1bad0, 0x3feb7d6c3dda338b, 0x3feb4e81b4e81b4f, 0x3feb2036406c80d9, + 0x3feaf286bca1af28, 0x3feac5701ac5701b, 0x3fea98ef606a63be, 0x3fea6d01a6d01a6d, + 0x3fea41a41a41a41a, 0x3fea16d3f97a4b02, 0x3fe9ec8e951033d9, 0x3fe9c2d14ee4a102, + 0x3fe999999999999a, 0x3fe970e4f80cb872, 0x3fe948b0fcd6e9e0, 0x3fe920fb49d0e229, + 0x3fe8f9c18f9c18fa, 0x3fe8d3018d3018d3, 0x3fe8acb90f6bf3aa, 0x3fe886e5f0abb04a, + 0x3fe8618618618618, 0x3fe83c977ab2bedd, 0x3fe8181818181818, 0x3fe7f405fd017f40, + 0x3fe7d05f417d05f4, 0x3fe7ad2208e0ecc3, 0x3fe78a4c8178a4c8, 0x3fe767dce434a9b1, + 0x3fe745d1745d1746, 0x3fe724287f46debc, 0x3fe702e05c0b8170, 0x3fe6e1f76b4337c7, + 0x3fe6c16c16c16c17, 0x3fe6a13cd1537290, 0x3fe6816816816817, 0x3fe661ec6a5122f9, + 0x3fe642c8590b2164, 0x3fe623fa77016240, 0x3fe6058160581606, 0x3fe5e75bb8d015e7, + 0x3fe5c9882b931057, 0x3fe5ac056b015ac0, 0x3fe58ed2308158ed, 0x3fe571ed3c506b3a, + 0x3fe5555555555555, 0x3fe5390948f40feb, 0x3fe51d07eae2f815, 0x3fe5015015015015, + 0x3fe4e5e0a72f0539, 0x3fe4cab88725af6e, 0x3fe4afd6a052bf5b, 0x3fe49539e3b2d067, + 0x3fe47ae147ae147b, 0x3fe460cbc7f5cf9a, 0x3fe446f86562d9fb, 0x3fe42d6625d51f87, + 0x3fe4141414141414, 0x3fe3fb013fb013fb, 0x3fe3e22cbce4a902, 0x3fe3c995a47babe7, + 0x3fe3b13b13b13b14, 0x3fe3991c2c187f63, 0x3fe3813813813814, 0x3fe3698df3de0748, + 0x3fe3521cfb2b78c1, 0x3fe33ae45b57bcb2, 0x3fe323e34a2b10bf, 0x3fe30d190130d190, + 0x3fe2f684bda12f68, 0x3fe2e025c04b8097, 0x3fe2c9fb4d812ca0, 0x3fe2b404ad012b40, + 0x3fe29e4129e4129e, 0x3fe288b01288b013, 0x3fe27350b8812735, 0x3fe25e22708092f1, + 0x3fe2492492492492, 0x3fe23456789abcdf, 0x3fe21fb78121fb78, 0x3fe20b470c67c0d9, + 0x3fe1f7047dc11f70, 0x3fe1e2ef3b3fb874, 0x3fe1cf06ada2811d, 0x3fe1bb4a4046ed29, + 0x3fe1a7b9611a7b96, 0x3fe19453808ca29c, 0x3fe1811811811812, 0x3fe16e0689427379, + 0x3fe15b1e5f75270d, 0x3fe1485f0e0acd3b, 0x3fe135c81135c811, 0x3fe12358e75d3033, + 0x3fe1111111111111, 0x3fe0fef010fef011, 0x3fe0ecf56be69c90, 0x3fe0db20a88f4696, + 0x3fe0c9714fbcda3b, 0x3fe0b7e6ec259dc8, 0x3fe0a6810a6810a7, 0x3fe0953f39010954, + 0x3fe0842108421084, 0x3fe073260a47f7c6, 0x3fe0624dd2f1a9fc, 0x3fe05197f7d73404, + 0x3fe0410410410410, 0x3fe03091b51f5e1a, 0x3fe0204081020408, 0x3fe0101010101010, + }; + + unsigned xb = bit_cast(x); + uint64_t bval = masked_recp_table[(xb >> 16) & 0xff]; + return bit_cast(bval); +} + +// Compute the reciprocal log of \p x in the range [sqrt(2)/2 .. sqrt(2)]. +double log_recp_of_masked(float x) { + uint64_t masked_log_recp_table[256] = { + 0x3fe62e42fefa39ef, 0x3fe5ee82aa241920, 0x3fe5af405c3649e0, + 0x3fe5707a26bb8c66, 0x3fe5322e26867857, 0x3fe4f45a835a4e19, + 0x3fe4b6fd6f970c1f, 0x3fe47a1527e8a2d4, 0x3fe43d9ff2f923c5, + 0x3fe4019c2125ca93, 0x3fe3c6080c36bfb5, 0x3fe38ae2171976e8, + 0x3fe35028ad9d8c85, 0x3fe315da4434068b, 0x3fe2dbf557b0df43, + 0x3fe2a2786d0ec107, 0x3fe269621134db92, 0x3fe230b0d8bebc98, + 0x3fe1f8635fc61658, 0x3fe1c07849ae6007, 0x3fe188ee40f23ca7, + 0x3fe151c3f6f29612, 0x3fe11af823c75aa8, 0x3fe0e4898611cce1, + 0x3fe0ae76e2d054fa, 0x3fe078bf0533c568, 0x3fe04360be7603ae, + 0x3fe00e5ae5b207ab, 0x3fdfb358af7a4884, 0x3fdf4aa7ee03192e, + 0x3fdee2a156b413e5, 0x3fde7b42c3ddad74, 0x3fde148a1a2726cf, + 0x3fddae75484c9615, 0x3fdd490246defa6a, 0x3fdce42f18064744, + 0x3fdc7ff9c74554ca, 0x3fdc1c60693fa39e, 0x3fdbb9611b80e2fc, + 0x3fdb56fa0446290a, 0x3fdaf5295248cdcf, 0x3fda93ed3c8ad9e3, + 0x3fda33440224fa79, 0x3fd9d32bea15ed3a, 0x3fd973a3431356ae, + 0x3fd914a8635bf689, 0x3fd8b639a88b2df4, 0x3fd85855776dcbfb, + 0x3fd7fafa3bd8151c, 0x3fd79e26687cfb3e, 0x3fd741d876c67bb1, + 0x3fd6e60ee6af1973, 0x3fd68ac83e9c6a15, 0x3fd630030b3aac48, + 0x3fd5d5bddf595f31, 0x3fd57bf753c8d1fb, 0x3fd522ae0738a3d7, + 0x3fd4c9e09e172c3d, 0x3fd4718dc271c41c, 0x3fd419b423d5e8c6, + 0x3fd3c25277333183, 0x3fd36b6776be1116, 0x3fd314f1e1d35ce3, + 0x3fd2bef07cdc9355, 0x3fd269621134db91, 0x3fd214456d0eb8d5, + 0x3fd1bf99635a6b95, 0x3fd16b5ccbacfb73, 0x3fd1178e8227e47a, + 0x3fd0c42d676162e2, 0x3fd07138604d5864, 0x3fd01eae5626c691, + 0x3fcf991c6cb3b37a, 0x3fcef5ade4dcffe5, 0x3fce530effe71013, + 0x3fcdb13db0d48941, 0x3fcd1037f2655e7b, 0x3fcc6ffbc6f00f71, + 0x3fcbd087383bd8aa, 0x3fcb31d8575bce3b, 0x3fca93ed3c8ad9e5, + 0x3fc9f6c407089663, 0x3fc95a5adcf70182, 0x3fc8beafeb38fe8f, + 0x3fc823c16551a3c0, 0x3fc7898d85444c74, 0x3fc6f0128b756ab9, + 0x3fc6574ebe8c1339, 0x3fc5bf406b543db0, 0x3fc527e5e4a1b58d, + 0x3fc4913d8333b563, 0x3fc3fb45a59928ca, 0x3fc365fcb0159014, + 0x3fc2d1610c86813d, 0x3fc23d712a49c201, 0x3fc1aa2b7e23f729, + 0x3fc1178e8227e47a, 0x3fc08598b59e3a07, 0x3fbfe89139dbd565, + 0x3fbec739830a1126, 0x3fbda7276384469e, 0x3fbc885801bc4b20, + 0x3fbb6ac88dad5b1d, 0x3fba4e7640b1bc38, 0x3fb9335e5d594988, + 0x3fb8197e2f40e3f0, 0x3fb700d30aeac0e8, 0x3fb5e95a4d9791cd, + 0x3fb4d3115d207eac, 0x3fb3bdf5a7d1ee5e, 0x3fb2aa04a44717a1, + 0x3fb1973bd1465561, 0x3fb08598b59e3a06, 0x3faeea31c006b87c, + 0x3faccb73cdddb2d0, 0x3faaaef2d0fb1108, 0x3fa894aa149fb34b, + 0x3fa67c94f2d4bb65, 0x3fa466aed42de3f9, 0x3fa252f32f8d1840, + 0x3fa0415d89e74440, 0x3f9c63d2ec14aad7, 0x3f98492528c8cac5, + 0x3f9432a925980cbc, 0x3f90205658935837, 0x3f882448a388a283, + 0x3f8010157588de69, 0x3f70080559588b25, 0x0, + 0xbf7fe02a6b106799, 0xbf8fc0a8b0fc03c4, 0xbf97b91b07d5b126, + 0xbf9f829b0e7832f8, 0xbfa39e87b9febd68, 0xbfa77458f632dcff, + 0xbfab42dd711971b9, 0xbfaf0a30c01162a8, 0xbfb16536eea37ae3, + 0xbfb341d7961bd1d0, 0xbfb51b073f06183c, 0xbfb6f0d28ae56b4e, + 0xbfb8c345d6319b23, 0xbfba926d3a4ad562, 0xbfbc5e548f5bc743, + 0xbfbe27076e2af2ea, 0xbfbfec9131dbeabc, 0xbfc0d77e7cd08e5b, + 0xbfc1b72ad52f67a2, 0xbfc29552f81ff521, 0xbfc371fc201e8f75, + 0xbfc44d2b6ccb7d1c, 0xbfc526e5e3a1b438, 0xbfc5ff3070a793d6, + 0xbfc6d60fe719d21b, 0xbfc7ab890210d907, 0xbfc87fa06520c911, + 0xbfc9525a9cf456b6, 0xbfca23bc1fe2b561, 0xbfcaf3c94e80bff3, + 0xbfcbc286742d8cd4, 0xbfcc8ff7c79a9a20, 0xbfcd5c216b4fbb94, + 0xbfce27076e2af2e8, 0xbfcef0adcbdc5935, 0xbfcfb9186d5e3e29, + 0xbfd0402594b4d041, 0xbfd0a324e27390e2, 0xbfd1058bf9ae4ad4, + 0xbfd1675cababa60f, 0xbfd1c898c16999fb, 0xbfd22941fbcf7966, + 0xbfd2895a13de86a4, 0xbfd2e8e2bae11d31, 0xbfd347dd9a987d56, + 0xbfd3a64c556945ea, 0xbfd404308686a7e4, 0xbfd4618bc21c5ec2, + 0xbfd4be5f957778a1, 0xbfd51aad872df82e, 0xbfd5767717455a6c, + 0xbfd5d1bdbf5809ca, 0xbfd62c82f2b9c796, 0xbfd686c81e9b14ad, + 0xbfd6e08eaa2ba1e4, 0xbfd739d7f6bbd007, 0xbfd792a55fdd47a1, + 0xbfd7eaf83b82afc2, 0xbfd842d1da1e8b18, 0xbfd89a3386c1425b, + 0xbfd8f11e873662c8, 0xbfd947941c2116fb, 0xbfd99d958117e08a, + 0xbfd9f323ecbf984d, 0xbfda484090e5bb09, 0xbfda9cec9a9a084a, + 0xbfdaf1293247786b, 0xbfdb44f77bcc8f64, 0xbfdb9858969310fd, + 0xbfdbeb4d9da71b7a, 0xbfdc3dd7a7cdad4d, 0xbfdc8ff7c79a9a21, + 0xbfdce1af0b85f3ec, 0xbfdd32fe7e00ebd5, 0xbfdd83e7258a2f3e, + 0xbfddd46a04c1c4a1, 0xbfde24881a7c6c26, 0xbfde744261d68789, + 0xbfdec399d2468cc1, 0xbfdf128f5faf06ec, 0xbfdf6123fa7028ad, + 0xbfdfaf588f78f31d, 0xbfdffd2e0857f497, 0xbfe02552a5a5d0ff, + 0xbfe04bdf9da926d2, 0xbfe0723e5c1cdf41, 0xbfe0986f4f573521, + 0xbfe0be72e4252a83, 0xbfe0e44985d1cc8c, 0xbfe109f39e2d4c96, + 0xbfe12f719593efbd, 0xbfe154c3d2f4d5ea, 0xbfe179eabbd899a0, + 0xbfe19ee6b467c96f, 0xbfe1c3b81f713c25, 0xbfe1e85f5e7040d1, + 0xbfe20cdcd192ab6e, 0xbfe23130d7bebf43, 0xbfe2555bce98f7ca, + 0xbfe2795e1289b11b, 0xbfe29d37fec2b08b, 0xbfe2c0e9ed448e8c, + 0xbfe2e47436e40268, 0xbfe307d7334f10be, 0xbfe32b1339121d71, + 0xbfe34e289d9ce1d2, 0xbfe37117b54747b6, 0xbfe393e0d3562a1a, + 0xbfe3b68449fffc23, 0xbfe3d9026a7156fb, 0xbfe3fb5b84d16f43, + 0xbfe41d8fe84672af, 0xbfe43f9fe2f9ce67, 0xbfe4618bc21c5ec2, + 0xbfe48353d1ea88df, 0xbfe4a4f85db03ebb, 0xbfe4c679afccee39, + 0xbfe4e7d811b75bb0, 0xbfe50913cc01686b, 0xbfe52a2d265bc5ab, + 0xbfe54b2467999498, 0xbfe56bf9d5b3f399, 0xbfe58cadb5cd7989, + 0xbfe5ad404c359f2d, 0xbfe5cdb1dc6c1765, 0xbfe5ee02a9241676, + 0xbfe60e32f44788d9, + }; + + unsigned xb = bit_cast(x); + uint64_t bval = masked_log_recp_table[(xb >> 16) & 0xff]; + return bit_cast(bval); +} + +/// Evaluate a polynomial that approximates log(x+1) in the range [0-0.01]. +double approximate_log_pol_1_to_1001(double x) { + return -8.0159120687014415322143784594351322561698644901218e-21 + + x * (0.99999999999996291855097751977154985070228576660156 + + x * (-0.49999999982958548416789312796026933938264846801758 + + x * (0.33333320027787793904394675337243825197219848632812 + + x * (-0.249963278288175078101218673509720247238874435424805 + + x * 0.195840954071922368484592880122363567352294921875)))); +} + +/// @return True if \p x is a NAN. +bool is_nan(float x) { + unsigned xb = bit_cast(x); + xb >>= 23; + return (xb & 0xff) == 0xff; +} + +// Handbook of Floating-Point Arithmetic -- Jean-Michel Muller +// Chapter 11. Evaluating Floating-Point Elementary Functions (pg. 387) +float __attribute__((noinline)) my_log(float x) { + // Handle the special values: + if (x == 0) { + return bit_cast(0xff800000); // -Inf + } else if (x < 0) { + return bit_cast(0xffc00000); // -Nan. + } else if (is_nan(x)) { + return x; + } + + /// Extract the fraction, and the power-of-two exponent, such that: + // (2^E) * m = x; + auto a = reduce_fp32(x); + float m = a.first; + int E = a.second; + + // Reduce the range of m to [sqrt(2)/2 -- sqrt(2)] + if (m > 1.4142136) { + E = E + 1; + m = m / 2; + } + + // Compute the reciprocal of m using a lookup table. + double ri = recip_of_masked(m); + double z = m * ri - 1; + double log2 = bit_cast(0x3fe62e42fefa39ef); + + // We use double here because float is not accurate enough for the final + // reduction. We are missing just a few bits. + + // Compute log(1/ri) using a lookup table. + double ln_ri = log_recp_of_masked(m); + // Approximate log(1+z) using a polynomial: + double ln_1z = approximate_log_pol_1_to_1001(z); + + // Perform the final reduction. + return (E * log2 + ln_1z) - ln_ri; +} + +// Wrap the standard log(double) and use it as the ground truth. +float accurate_log(float x) { return log((double)x); } + +// Wrap the standard log(double) and use it as the ground truth. +float libc_log(float x) { return logf(x); } + +int main(int argc, char **argv) { + //print_recp_table_for_3f_values(); + //print_log_recp_table_for_3f_values(); + print_ulp_deltas(my_log, accurate_log); +} diff --git a/src/3rd_party/fast_log/src/log_approx.cc b/src/3rd_party/fast_log/src/log_approx.cc new file mode 100644 index 00000000..6b481372 --- /dev/null +++ b/src/3rd_party/fast_log/src/log_approx.cc @@ -0,0 +1,136 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +double __attribute__((noinline)) nop(double x) { return 0.00001; } + +/// @returns the exponent and a normalized mantissa with the relationship: +/// [a * 2^b] = x +std::pair my_frexp(double x) { + uint64_t bits = bit_cast(x); + if (bits == 0) { + return { 0., 0 }; + } + // See: + // https://en.wikipedia.org/wiki/IEEE_754#Basic_and_interchange_formats + + // Extract the 52-bit mantissa field. + uint64_t mantissa = bits & 0xFFFFFFFFFFFFF; + bits >>= 52; + + // Extract the 11-bit exponent field, and add the bias. + int exponent = int(bits & 0x7ff) - 1023; + bits >>= 11; + + // Extract the sign bit. + uint64_t sign = bits; + bits >>= 1; + + // Construct the normalized double; + uint64_t res = sign; + res <<= 11; + res |= 1023 - 1; + res <<= 52; + res |= mantissa; + + double frac = bit_cast(res); + return { frac, exponent + 1 }; +} + +double __attribute__((noinline)) fastlog2(double x) { + + /// Extract the fraction, and the power-of-two exponent. + + auto a = my_frexp(x); + x = a.first; + int pow2 = a.second; + + // Use a 4-part polynom to approximate log2(x); + double c[] = { 1.33755322, -4.42852392, 6.30371424, -3.21430967 }; + double log2 = 0.6931471805599453; + + // Use Horner's method to evaluate the polynomial. + double val = c[3] + x * (c[2] + x * (c[1] + x * (c[0]))); + + // Compute log2(x), and convert the result to base-e. + return log2 * (pow2 + val); +} + +// Find the max error. +void validate_error(const std::vector &iv, double max_range = 20.0, + int iterations = 10000) { + double max_error = 0; + double error_val = 0; + unsigned validated = 0; + // Validate a sequence of numbers. + for (int i = 1; i < iterations; i++) { + validated++; + double val = ((max_range * i) / iterations); + double err = std::abs(log(val) - fastlog2(val)); + if (err > max_error) { + error_val = val; + max_error = err; + } + } + + // Validate the pre-computed random numbers. + for (auto elem : iv) { + validated++; + double err = std::abs(log(elem) - fastlog2(elem)); + if (err > max_error) { + error_val = elem; + max_error = err; + } + } + + std::cout << "Tested " << validated << " values [0.." << max_range << "]\n"; + std::cout << "Max error " << max_error << " at " << error_val << "\n"; + std::cout << "# " << log(error_val) << " vs " << fastlog2(error_val) << "\n"; +} + +// Check if the function is monolithic. +void validate_monotonic(double max_range = 20.0, int iterations = 10000) { + double prev = fastlog2(0); + unsigned non_monotonic = 0; + for (int i = 1; i < iterations; i++) { + double val = ((max_range * i) / iterations); + val = fastlog2(val); + if (prev > val) { + non_monotonic += 1; + } + prev = val; + } + + std::cout << "Tested " << iterations << " values [0.." << max_range << "]\n"; + std::cout << "Found " << non_monotonic << " non-monotinic values\n"; +} + +void check() { + auto a = my_frexp(4.5); + auto b = my_frexp(3.2); + auto c = my_frexp(-10); + auto d = my_frexp(65536); + assert(a.first == 0.5625 && a.second == 3); + assert(b.first == 0.8 && b.second == 2); + assert(c.first == -0.625 && c.second == 4); + assert(d.first == 0.5 && d.second == 17); +} + +int main(int argc, char **argv) { + check(); + std::vector iv = generate_test_vector(0.5, 10., 10000); + validate_error(iv); + validate_monotonic(); + + bench("fast_log", fastlog2, iv); + bench("libm_log", log, iv); + bench("nop ", nop, iv); + return 0; +} diff --git a/src/3rd_party/fast_log/src/util.h b/src/3rd_party/fast_log/src/util.h new file mode 100644 index 00000000..8ae43ba5 --- /dev/null +++ b/src/3rd_party/fast_log/src/util.h @@ -0,0 +1,204 @@ +#include +#include +#include +#include +#include +#include + +using std::chrono::duration; +using std::chrono::duration_cast; +using std::chrono::high_resolution_clock; +using std::chrono::milliseconds; + +#define PRINT_DOUBLE(name, x) \ + { \ + uint64_t ux = bit_cast(x); \ + printf("%s: (%.9f 0x%lx)\n", #name, x, ux); \ + } +#define PRINT_FLOAT(name, x) \ + { \ + uint32_t ux = bit_cast(x); \ + printf("%s: (%.9f 0x%lx)\n", #name, x, ux); \ + } + +#define PRINT_INT(name, x) \ + { printf("%s: (0x%lx %ld)\n", #name, x, x); } + +template To bit_cast(const From &src) noexcept { + static_assert(sizeof(To) == sizeof(From), "Size mismatch"); + To dst; + std::memcpy(&dst, &src, sizeof(To)); + return dst; +} + +// Return the bitwise distance between the two doubles. +// Notice that a change in sign will return a high ULP difference, +// which is desirable. +template UnsignedTy ulp_difference(FloatTy n1, FloatTy n2) { + UnsignedTy b1 = bit_cast(n1); + UnsignedTy b2 = bit_cast(n2); + if (b1 == b2 || (std::isnan(n1) && std::isnan(n2))) { + return 0; + } + + // Return the delta between the two numbers in bits. + return (b1 > b2) ? (b1 - b2) : (b2 - b1); +} + +/// A generic histogram class. +template struct Histogram { + uint64_t payload_[NumBins]; + Histogram() { + for (unsigned i = 0; i < NumBins; i++) { + payload_[i] = 0; + } + } + // Add the counts + void join(const Histogram &other) { + for (unsigned i = 0; i < NumBins; i++) { + payload_[i] += other.payload_[i]; + } + } + void add(unsigned idx, uint64_t val = 1) { + idx = std::min(NumBins - 1, idx); + payload_[idx] += val; + } + + void dump(const char *message) { + printf("%s", message); + for (unsigned i = 0; i < NumBins; i++) { + double percent = 100 * double(payload_[i]) / double(1LL << 32); + if (i < (NumBins - 1)) { + printf("%02d) %02.3f%% - %08lu\n", i, percent, payload_[i]); + } else { + printf("Other: %02.3f%% - %08lu\n", percent, payload_[i]); + } + } + }; +}; + +/// A helper class that performs multi-threaded computation of ULP differences +/// between two implementations. +template +class Verifier { + std::thread threads_[NumThreads]; + Histogram hist_[NumThreads]; + + public: + void print_ulp_deltas(FloatTy (*handle1)(FloatTy), FloatTy (*handle2)(FloatTy)) { + auto scan = [&handle1, &handle2](uint64_t start, uint64_t end, Histogram &hist) { + // For each value in the 32bit range. + for (uint64_t i = start; i < end; i++) { + FloatTy val = bit_cast((unsigned)i); + FloatTy r1 = handle1(val); + FloatTy r2 = handle2(val); + // Record the ULP delta. + unsigned ud = ulp_difference(r1, r2); + hist.add(ud); + } + }; + + uint64_t chunk_size = (1L << 32) / NumThreads; + for (unsigned i = 0; i < NumThreads; i++) { + uint64_t start = i * chunk_size; + uint64_t end = (i + 1) * chunk_size; + threads_[i] = std::thread(scan, start, end, std::ref(hist_[i])); + } + for (unsigned i = 0; i < NumThreads; i++) { + threads_[i].join(); + } + // Merge the histograms after the workers finished. + for (unsigned i = 1; i < NumThreads; i++) { + hist_[0].join(hist_[i]); + } + // Report the histogram. + hist_[0].dump("\nULP delta:\n"); + } +}; + +// Compare two functions and count the number of values with different ULPs. +// See https://en.wikipedia.org/wiki/IEEE_754#Basic_and_interchange_formats +void print_ulp_deltas(float (*handle1)(float), float (*handle2)(float)) { + Verifier verifier; + verifier.print_ulp_deltas(handle1, handle2); +} + +// Prints a lookup table for [0x3fxx0000], that computes f(x)=log(1/x). +void print_log_recp_table_for_3f_values() { + uint64_t table[256] = { 0 }; + + for (unsigned i = 0; i < 256; i++) { + unsigned valb = (0x3f << 24) | (i << 16); + float val = bit_cast(valb); + double val2 = log(1. / (double)val); + table[i] = bit_cast(val2); + } + + // Print the ULP distribution: + printf("uint64_t masked_log_recp_table[256] = {"); + for (int i = 0; i < 256; i++) { + if (i % 8 == 0) { + printf("\n\t"); + } + printf("0x%lx, ", table[i]); + } + printf("};\n"); +} + +// Prints a lookup table for [0x3fxx0000], that computes f(x)=1/x. +void print_recp_table_for_3f_values() { + uint64_t table[256] = { 0 }; + + for (unsigned i = 0; i < 256; i++) { + unsigned valb = (0x3f << 24) | (i << 16); + float val = bit_cast(valb); + double val2 = (1. / (double)val); + table[i] = bit_cast(val2); + } + + // Print the ULP distribution: + printf("uint64_t masked_recp_table[256] = {"); + for (int i = 0; i < 256; i++) { + if (i % 8 == 0) { + printf("\n\t"); + } + printf("0x%lx, ", table[i]); + } + printf("};\n"); +} + +/// @return \p count random uniform numbers in the range \p start to \p end. +template +std::vector generate_test_vector(FloatTy start, FloatTy end, unsigned count) { + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_real_distribution dist(start, end); + std::vector res; + for (unsigned i = 0; i < count; i++) { + res.push_back(dist(mt)); + } + return res; +} + +/// @brief Benchmark a program with the name \p name, and function pointer +/// \p handle. Run \p iterations iterations on inputs from the test vector +/// \p iv. Prints the result to stdout. +template +void bench(const std::string &name, FloatTy (*handle)(FloatTy), const std::vector &iv, + int iterations = 10000) { + auto t1 = high_resolution_clock::now(); + + FloatTy sum = 0; + for (int iter = 0; iter < iterations; iter++) { + for (auto elem : iv) { + sum += handle(elem); + } + } + + auto t2 = high_resolution_clock::now(); + auto ms_int = duration_cast(t2 - t1); + std::cout << "name = " << name << ", "; + std::cout << "sum = " << sum << ", "; + std::cout << "time = " << ms_int.count() << "ms\n"; +} diff --git a/src/3rd_party/murmurhash3/MurmurHash3.cpp b/src/3rd_party/murmurhash3/MurmurHash3.cpp new file mode 100644 index 00000000..fa0deb83 --- /dev/null +++ b/src/3rd_party/murmurhash3/MurmurHash3.cpp @@ -0,0 +1,340 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "MurmurHash3.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define FORCE_INLINE __forceinline + +#include + +#define ROTL32(x,y) _rotl(x,y) +#define ROTL64(x,y) _rotl64(x,y) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define FORCE_INLINE inline __attribute__((always_inline)) + +inline uint32_t rotl32 ( uint32_t x, int8_t r ) +{ + return (x << r) | (x >> (32 - r)); +} + +inline uint64_t rotl64 ( uint64_t x, int8_t r ) +{ + return (x << r) | (x >> (64 - r)); +} + +#define ROTL32(x,y) rotl32(x,y) +#define ROTL64(x,y) rotl64(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) +{ + uint8_t *bytes = (uint8_t *)p; + uint32_t retval = (bytes[4 * i + 0] << 24) | (bytes[4 * i + 1] << 16) | (bytes[4 * i + 2] << 8) | (bytes[4 * i + 3]); + return retval; +} + +FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) +{ + uint8_t *bytes = (uint8_t *)p; + uint64_t retval = (static_cast(bytes[8 * i + 0]) << 56) | (static_cast(bytes[8 * i + 1]) << 48) | (static_cast(bytes[8 * i + 2]) << 40) | (static_cast(bytes[8 * i + 3]) << 32) + | (static_cast(bytes[8 * i + 4]) << 24) | (static_cast(bytes[8 * i + 5]) << 16) | (static_cast(bytes[8 * i + 6]) << 8) | static_cast(bytes[8 * i + 7]); + return retval; +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +FORCE_INLINE uint32_t fmix32 ( uint32_t h ) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//---------- + +FORCE_INLINE uint64_t fmix64 ( uint64_t k ) +{ + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32 ( const void * key, int len, + uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + //---------- + // body + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); + + for(int i = -nblocks; i; i++) + { + uint32_t k1 = getblock32(blocks,i); + + k1 *= c1; + k1 = ROTL32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1,13); + h1 = h1*5+0xe6546b64; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*4); + + uint32_t k1 = 0; + + switch(len & 3) + { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_128 ( const void * key, const int len, + uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + //---------- + // body + + const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); + + for(int i = -nblocks; i; i++) + { + uint32_t k1 = getblock32(blocks,i*4+0); + uint32_t k2 = getblock32(blocks,i*4+1); + uint32_t k3 = getblock32(blocks,i*4+2); + uint32_t k4 = getblock32(blocks,i*4+3); + + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + + h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; + + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + + h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; + + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + + h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; + + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + + h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch(len & 15) + { + case 15: k4 ^= tail[14] << 16; + case 14: k4 ^= tail[13] << 8; + case 13: k4 ^= tail[12] << 0; + k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; + + case 12: k3 ^= tail[11] << 24; + case 11: k3 ^= tail[10] << 16; + case 10: k3 ^= tail[ 9] << 8; + case 9: k3 ^= tail[ 8] << 0; + k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; + + case 8: k2 ^= tail[ 7] << 24; + case 7: k2 ^= tail[ 6] << 16; + case 6: k2 ^= tail[ 5] << 8; + case 5: k2 ^= tail[ 4] << 0; + k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; + + case 4: k1 ^= tail[ 3] << 24; + case 3: k1 ^= tail[ 2] << 16; + case 2: k1 ^= tail[ 1] << 8; + case 1: k1 ^= tail[ 0] << 0; + k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + h1 = fmix32(h1); + h2 = fmix32(h2); + h3 = fmix32(h3); + h4 = fmix32(h4); + + h1 += h2; h1 += h3; h1 += h4; + h2 += h1; h3 += h1; h4 += h1; + + ((uint32_t*)out)[0] = h1; + ((uint32_t*)out)[1] = h2; + ((uint32_t*)out)[2] = h3; + ((uint32_t*)out)[3] = h4; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128 ( const void * key, const int len, + const uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + //---------- + // body + + const uint64_t * blocks = (const uint64_t *)(data); + + for(int i = 0; i < nblocks; i++) + { + uint64_t k1 = getblock64(blocks,i*2+0); + uint64_t k2 = getblock64(blocks,i*2+1); + + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + + h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch(len & 15) + { + case 15: k2 ^= ((uint64_t)tail[14]) << 48; + case 14: k2 ^= ((uint64_t)tail[13]) << 40; + case 13: k2 ^= ((uint64_t)tail[12]) << 32; + case 12: k2 ^= ((uint64_t)tail[11]) << 24; + case 11: k2 ^= ((uint64_t)tail[10]) << 16; + case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; + case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; + case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; + case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; + case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; + case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; + case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; + case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; + case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} + +//----------------------------------------------------------------------------- + diff --git a/src/3rd_party/murmurhash3/MurmurHash3.h b/src/3rd_party/murmurhash3/MurmurHash3.h new file mode 100644 index 00000000..e1c6d349 --- /dev/null +++ b/src/3rd_party/murmurhash3/MurmurHash3.h @@ -0,0 +1,37 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH3_H_ +#define _MURMURHASH3_H_ + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) && (_MSC_VER < 1600) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); + +void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); + +void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH3_H_ diff --git a/src/3rd_party/rapidyaml/rapidyaml-0.5.0.hpp b/src/3rd_party/rapidyaml/rapidyaml-0.5.0.hpp new file mode 100644 index 00000000..e4b32755 --- /dev/null +++ b/src/3rd_party/rapidyaml/rapidyaml-0.5.0.hpp @@ -0,0 +1,33651 @@ +#ifndef _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ +#define _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ + +// +// Rapid YAML - a library to parse and emit YAML, and do it fast. +// +// https://github.com/biojppm/rapidyaml +// +// DO NOT EDIT. This file is generated automatically. +// This is an amalgamated single-header version of the library. +// +// INSTRUCTIONS: +// - Include at will in any header of your project +// - In one (and only one) of your project source files, +// #define RYML_SINGLE_HDR_DEFINE_NOW and then include this header. +// This will enable the function and class definitions in +// the header file. +// - To compile into a shared library, just define the +// preprocessor symbol RYML_SHARED . This will take +// care of symbol export/import. +// + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// LICENSE.txt +// https://github.com/biojppm/rapidyaml/LICENSE.txt +//-------------------------------------------------------------------------------- +//******************************************************************************** + +// Copyright (c) 2018, Joao Paulo Magalhaes +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + + // shared library: export when defining +#if defined(RYML_SHARED) && defined(RYML_SINGLE_HDR_DEFINE_NOW) && !defined(RYML_EXPORTS) +#define RYML_EXPORTS +#endif + + + // propagate defines to c4core +#if defined(RYML_SINGLE_HDR_DEFINE_NOW) && !defined(C4CORE_SINGLE_HDR_DEFINE_NOW) +#define C4CORE_SINGLE_HDR_DEFINE_NOW +#endif + +#if defined(RYML_EXPORTS) && !defined(C4CORE_EXPORTS) +#define C4CORE_EXPORTS +#endif + +#if defined(RYML_SHARED) && !defined(C4CORE_SHARED) +#define C4CORE_SHARED +#endif + +// workaround for include removal while amalgamating +// resulting in missing in arm-none-eabi-g++ +// https://github.com/biojppm/rapidyaml/issues/193 +#include + + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/c4core_all.hpp +// https://github.com/biojppm/rapidyaml/src/c4/c4core_all.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4CORE_SINGLE_HEADER_AMALGAMATED_HPP_ +#define _C4CORE_SINGLE_HEADER_AMALGAMATED_HPP_ + +// +// c4core - C++ utilities +// +// https://github.com/biojppm/c4core +// +// DO NOT EDIT. This file is generated automatically. +// This is an amalgamated single-header version of the library. +// +// INSTRUCTIONS: +// - Include at will in any header of your project +// - In one (and only one) of your project source files, +// #define C4CORE_SINGLE_HDR_DEFINE_NOW and then include this header. +// This will enable the function and class definitions in +// the header file. +// - To compile into a shared library, just define the +// preprocessor symbol C4CORE_SHARED . This will take +// care of symbol export/import. +// + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// LICENSE.txt +// https://github.com/biojppm/c4core/LICENSE.txt +//-------------------------------------------------------------------------------- +//******************************************************************************** + +// Copyright (c) 2018, Joao Paulo Magalhaes +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + +// shared library: export when defining +#if defined(C4CORE_SHARED) && defined(C4CORE_SINGLE_HDR_DEFINE_NOW) && !defined(C4CORE_EXPORTS) +#define C4CORE_EXPORTS +#endif + + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/export.hpp +// https://github.com/biojppm/c4core/src/c4/export.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_EXPORT_HPP_ +#define C4_EXPORT_HPP_ + +#ifdef _WIN32 + #ifdef C4CORE_SHARED + #ifdef C4CORE_EXPORTS + #define C4CORE_EXPORT __declspec(dllexport) + #else + #define C4CORE_EXPORT __declspec(dllimport) + #endif + #else + #define C4CORE_EXPORT + #endif +#else + #define C4CORE_EXPORT +#endif + +#endif /* C4CORE_EXPORT_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/export.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/preprocessor.hpp +// https://github.com/biojppm/c4core/src/c4/preprocessor.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_PREPROCESSOR_HPP_ +#define _C4_PREPROCESSOR_HPP_ + +/** @file preprocessor.hpp Contains basic macros and preprocessor utilities. + * @ingroup basic_headers */ + +#ifdef __clang__ + /* NOTE: using , ## __VA_ARGS__ to deal with zero-args calls to + * variadic macros is not portable, but works in clang, gcc, msvc, icc. + * clang requires switching off compiler warnings for pedantic mode. + * @see http://stackoverflow.com/questions/32047685/variadic-macro-without-arguments */ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" // warning: token pasting of ',' and __VA_ARGS__ is a GNU extension +#elif defined(__GNUC__) + /* GCC also issues a warning for zero-args calls to variadic macros. + * This warning is switched on with -pedantic and apparently there is no + * easy way to turn it off as with clang. But marking this as a system + * header works. + * @see https://gcc.gnu.org/onlinedocs/cpp/System-Headers.html + * @see http://stackoverflow.com/questions/35587137/ */ +# pragma GCC system_header +#endif + +#define C4_WIDEN(str) L"" str + +#define C4_COUNTOF(arr) (sizeof(arr)/sizeof((arr)[0])) + +#define C4_EXPAND(arg) arg + +/** useful in some macro calls with template arguments */ +#define C4_COMMA , +/** useful in some macro calls with template arguments + * @see C4_COMMA */ +#define C4_COMMA_X C4_COMMA + +/** expand and quote */ +#define C4_XQUOTE(arg) _C4_XQUOTE(arg) +#define _C4_XQUOTE(arg) C4_QUOTE(arg) +#define C4_QUOTE(arg) #arg + +/** expand and concatenate */ +#define C4_XCAT(arg1, arg2) _C4_XCAT(arg1, arg2) +#define _C4_XCAT(arg1, arg2) C4_CAT(arg1, arg2) +#define C4_CAT(arg1, arg2) arg1##arg2 + +#define C4_VERSION_CAT(major, minor, patch) ((major)*10000 + (minor)*100 + (patch)) + +/** A preprocessor foreach. Spectacular trick taken from: + * http://stackoverflow.com/a/1872506/5875572 + * The first argument is for a macro receiving a single argument, + * which will be called with every subsequent argument. There is + * currently a limit of 32 arguments, and at least 1 must be provided. + * +Example: +@code{.cpp} +struct Example { + int a; + int b; + int c; +}; +// define a one-arg macro to be called +#define PRN_STRUCT_OFFSETS(field) PRN_STRUCT_OFFSETS_(Example, field) +#define PRN_STRUCT_OFFSETS_(structure, field) printf(C4_XQUOTE(structure) ":" C4_XQUOTE(field)" - offset=%zu\n", offsetof(structure, field)); + +// now call the macro for a, b and c +C4_FOR_EACH(PRN_STRUCT_OFFSETS, a, b, c); +@endcode */ +#define C4_FOR_EACH(what, ...) C4_FOR_EACH_SEP(what, ;, __VA_ARGS__) + +/** same as C4_FOR_EACH(), but use a custom separator between statements. + * If a comma is needed as the separator, use the C4_COMMA macro. + * @see C4_FOR_EACH + * @see C4_COMMA + */ +#define C4_FOR_EACH_SEP(what, sep, ...) _C4_FOR_EACH_(_C4_FOR_EACH_NARG(__VA_ARGS__), what, sep, __VA_ARGS__) + +/// @cond dev + +#define _C4_FOR_EACH_01(what, sep, x) what(x) sep +#define _C4_FOR_EACH_02(what, sep, x, ...) what(x) sep _C4_FOR_EACH_01(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_03(what, sep, x, ...) what(x) sep _C4_FOR_EACH_02(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_04(what, sep, x, ...) what(x) sep _C4_FOR_EACH_03(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_05(what, sep, x, ...) what(x) sep _C4_FOR_EACH_04(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_06(what, sep, x, ...) what(x) sep _C4_FOR_EACH_05(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_07(what, sep, x, ...) what(x) sep _C4_FOR_EACH_06(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_08(what, sep, x, ...) what(x) sep _C4_FOR_EACH_07(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_09(what, sep, x, ...) what(x) sep _C4_FOR_EACH_08(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_10(what, sep, x, ...) what(x) sep _C4_FOR_EACH_09(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_11(what, sep, x, ...) what(x) sep _C4_FOR_EACH_10(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_12(what, sep, x, ...) what(x) sep _C4_FOR_EACH_11(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_13(what, sep, x, ...) what(x) sep _C4_FOR_EACH_12(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_14(what, sep, x, ...) what(x) sep _C4_FOR_EACH_13(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_15(what, sep, x, ...) what(x) sep _C4_FOR_EACH_14(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_16(what, sep, x, ...) what(x) sep _C4_FOR_EACH_15(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_17(what, sep, x, ...) what(x) sep _C4_FOR_EACH_16(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_18(what, sep, x, ...) what(x) sep _C4_FOR_EACH_17(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_19(what, sep, x, ...) what(x) sep _C4_FOR_EACH_18(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_20(what, sep, x, ...) what(x) sep _C4_FOR_EACH_19(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_21(what, sep, x, ...) what(x) sep _C4_FOR_EACH_20(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_22(what, sep, x, ...) what(x) sep _C4_FOR_EACH_21(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_23(what, sep, x, ...) what(x) sep _C4_FOR_EACH_22(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_24(what, sep, x, ...) what(x) sep _C4_FOR_EACH_23(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_25(what, sep, x, ...) what(x) sep _C4_FOR_EACH_24(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_26(what, sep, x, ...) what(x) sep _C4_FOR_EACH_25(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_27(what, sep, x, ...) what(x) sep _C4_FOR_EACH_26(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_28(what, sep, x, ...) what(x) sep _C4_FOR_EACH_27(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_29(what, sep, x, ...) what(x) sep _C4_FOR_EACH_28(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_30(what, sep, x, ...) what(x) sep _C4_FOR_EACH_29(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_31(what, sep, x, ...) what(x) sep _C4_FOR_EACH_30(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_32(what, sep, x, ...) what(x) sep _C4_FOR_EACH_31(what, sep, __VA_ARGS__) +#define _C4_FOR_EACH_NARG(...) _C4_FOR_EACH_NARG_(__VA_ARGS__, _C4_FOR_EACH_RSEQ_N()) +#define _C4_FOR_EACH_NARG_(...) _C4_FOR_EACH_ARG_N(__VA_ARGS__) +#define _C4_FOR_EACH_ARG_N(_01, _02, _03, _04, _05, _06, _07, _08, _09, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, N, ...) N +#define _C4_FOR_EACH_RSEQ_N() 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 09, 08, 07, 06, 05, 04, 03, 02, 01 +#define _C4_FOR_EACH_(N, what, sep, ...) C4_XCAT(_C4_FOR_EACH_, N)(what, sep, __VA_ARGS__) + +/// @endcond + +#ifdef __clang__ +# pragma clang diagnostic pop +#endif + +#endif /* _C4_PREPROCESSOR_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/preprocessor.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/platform.hpp +// https://github.com/biojppm/c4core/src/c4/platform.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_PLATFORM_HPP_ +#define _C4_PLATFORM_HPP_ + +/** @file platform.hpp Provides platform information macros + * @ingroup basic_headers */ + +// see also https://sourceforge.net/p/predef/wiki/OperatingSystems/ + +#if defined(_WIN64) +# define C4_WIN +# define C4_WIN64 +#elif defined(_WIN32) +# define C4_WIN +# define C4_WIN32 +#elif defined(__ANDROID__) +# define C4_ANDROID +#elif defined(__APPLE__) +# include "TargetConditionals.h" +# if TARGET_OS_IPHONE || TARGET_IPHONE_SIMULATOR +# define C4_IOS +# elif TARGET_OS_MAC || TARGET_OS_OSX +# define C4_MACOS +# else +# error "Unknown Apple platform" +# endif +#elif defined(__linux__) || defined(__linux) +# define C4_UNIX +# define C4_LINUX +#elif defined(__unix__) || defined(__unix) +# define C4_UNIX +#elif defined(__arm__) || defined(__aarch64__) +# define C4_ARM +#elif defined(SWIG) +# define C4_SWIG +#else +# error "unknown platform" +#endif + +#if defined(__posix) || defined(C4_UNIX) || defined(C4_LINUX) +# define C4_POSIX +#endif + + +#endif /* _C4_PLATFORM_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/platform.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/cpu.hpp +// https://github.com/biojppm/c4core/src/c4/cpu.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_CPU_HPP_ +#define _C4_CPU_HPP_ + +/** @file cpu.hpp Provides processor information macros + * @ingroup basic_headers */ + +// see also https://sourceforge.net/p/predef/wiki/Architectures/ +// see also https://sourceforge.net/p/predef/wiki/Endianness/ +// see also https://github.com/googlesamples/android-ndk/blob/android-mk/hello-jni/jni/hello-jni.c +// see http://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qprocessordetection.h + +#ifdef __ORDER_LITTLE_ENDIAN__ + #define _C4EL __ORDER_LITTLE_ENDIAN__ +#else + #define _C4EL 1234 +#endif + +#ifdef __ORDER_BIG_ENDIAN__ + #define _C4EB __ORDER_BIG_ENDIAN__ +#else + #define _C4EB 4321 +#endif + +// mixed byte order (eg, PowerPC or ia64) +#define _C4EM 1111 + +#if defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64) + #define C4_CPU_X86_64 + #define C4_WORDSIZE 8 + #define C4_BYTE_ORDER _C4EL + +#elif defined(__i386) || defined(__i386__) || defined(_M_IX86) + #define C4_CPU_X86 + #define C4_WORDSIZE 4 + #define C4_BYTE_ORDER _C4EL + +#elif defined(__arm__) || defined(_M_ARM) \ + || defined(__TARGET_ARCH_ARM) || defined(__aarch64__) || defined(_M_ARM64) + #if defined(__aarch64__) || defined(_M_ARM64) + #define C4_CPU_ARM64 + #define C4_CPU_ARMV8 + #define C4_WORDSIZE 8 + #else + #define C4_CPU_ARM + #define C4_WORDSIZE 4 + #if defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) \ + || (defined(__ARCH_ARM) && __ARCH_ARM >= 8) + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 8) \ + #define C4_CPU_ARMV8 + #elif defined(__ARM_ARCH_7__) || defined(_ARM_ARCH_7) \ + || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) \ + || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) \ + || defined(__ARM_ARCH_7EM__) \ + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 7) \ + || (defined(_M_ARM) && _M_ARM >= 7) + #define C4_CPU_ARMV7 + #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_6KZ__) \ + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 6) + #define C4_CPU_ARMV6 + #elif defined(__ARM_ARCH_5TEJ__) \ + || defined(__ARM_ARCH_5TE__) \ + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 5) + #define C4_CPU_ARMV5 + #elif defined(__ARM_ARCH_4T__) \ + || (defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM >= 4) + #define C4_CPU_ARMV4 + #else + #error "unknown CPU architecture: ARM" + #endif + #endif + #if defined(__ARMEL__) || defined(__LITTLE_ENDIAN__) || defined(__AARCH64EL__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) \ + || defined(_MSC_VER) // winarm64 does not provide any of the above macros, + // but advises little-endianess: + // https://docs.microsoft.com/en-us/cpp/build/overview-of-arm-abi-conventions?view=msvc-170 + // So if it is visual studio compiling, we'll assume little endian. + #define C4_BYTE_ORDER _C4EL + #elif defined(__ARMEB__) || defined(__BIG_ENDIAN__) || defined(__AARCH64EB__) \ + || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) + #define C4_BYTE_ORDER _C4EB + #elif defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_PDP_ENDIAN__) + #define C4_BYTE_ORDER _C4EM + #else + #error "unknown endianness" + #endif + +#elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) + #define C4_CPU_IA64 + #define C4_WORDSIZE 8 + #define C4_BYTE_ORDER _C4EM + // itanium is bi-endian - check byte order below + +#elif defined(__ppc__) || defined(__ppc) || defined(__powerpc__) \ + || defined(_ARCH_COM) || defined(_ARCH_PWR) || defined(_ARCH_PPC) \ + || defined(_M_MPPC) || defined(_M_PPC) + #if defined(__ppc64__) || defined(__powerpc64__) || defined(__64BIT__) + #define C4_CPU_PPC64 + #define C4_WORDSIZE 8 + #else + #define C4_CPU_PPC + #define C4_WORDSIZE 4 + #endif + #define C4_BYTE_ORDER _C4EM + // ppc is bi-endian - check byte order below + +#elif defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH_) +# define C4_CPU_S390_X +# define C4_WORDSIZE 8 +# define C4_BYTE_ORDER _C4EB + +#elif defined(__riscv) + #if __riscv_xlen == 64 + #define C4_CPU_RISCV64 + #define C4_WORDSIZE 8 + #else + #define C4_CPU_RISCV32 + #define C4_WORDSIZE 4 + #endif + #define C4_BYTE_ORDER _C4EL + +#elif defined(__EMSCRIPTEN__) +# define C4_BYTE_ORDER _C4EL +# define C4_WORDSIZE 4 + +#elif defined(SWIG) + #error "please define CPU architecture macros when compiling with swig" + +#else + #error "unknown CPU architecture" +#endif + +#define C4_LITTLE_ENDIAN (C4_BYTE_ORDER == _C4EL) +#define C4_BIG_ENDIAN (C4_BYTE_ORDER == _C4EB) +#define C4_MIXED_ENDIAN (C4_BYTE_ORDER == _C4EM) + +#endif /* _C4_CPU_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/cpu.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/compiler.hpp +// https://github.com/biojppm/c4core/src/c4/compiler.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_COMPILER_HPP_ +#define _C4_COMPILER_HPP_ + +/** @file compiler.hpp Provides compiler information macros + * @ingroup basic_headers */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/platform.hpp +//#include "c4/platform.hpp" +#if !defined(C4_PLATFORM_HPP_) && !defined(_C4_PLATFORM_HPP_) +#error "amalgamate: file c4/platform.hpp must have been included at this point" +#endif /* C4_PLATFORM_HPP_ */ + + +// Compilers: +// C4_MSVC +// Visual Studio 2022: MSVC++ 17, 1930 +// Visual Studio 2019: MSVC++ 16, 1920 +// Visual Studio 2017: MSVC++ 15 +// Visual Studio 2015: MSVC++ 14 +// Visual Studio 2013: MSVC++ 13 +// Visual Studio 2013: MSVC++ 12 +// Visual Studio 2012: MSVC++ 11 +// Visual Studio 2010: MSVC++ 10 +// Visual Studio 2008: MSVC++ 09 +// Visual Studio 2005: MSVC++ 08 +// C4_CLANG +// C4_GCC +// C4_ICC (intel compiler) +/** @see http://sourceforge.net/p/predef/wiki/Compilers/ for a list of compiler identifier macros */ +/** @see https://msdn.microsoft.com/en-us/library/b0084kay.aspx for VS2013 predefined macros */ + +#if defined(_MSC_VER)// && (defined(C4_WIN) || defined(C4_XBOX) || defined(C4_UE4)) +# define C4_MSVC +# define C4_MSVC_VERSION_2022 17 +# define C4_MSVC_VERSION_2019 16 +# define C4_MSVC_VERSION_2017 15 +# define C4_MSVC_VERSION_2015 14 +# define C4_MSVC_VERSION_2013 12 +# define C4_MSVC_VERSION_2012 11 +# if _MSC_VER >= 1930 +# define C4_MSVC_VERSION C4_MSVC_VERSION_2022 // visual studio 2022 +# define C4_MSVC_2022 +# elif _MSC_VER >= 1920 +# define C4_MSVC_VERSION C_4MSVC_VERSION_2019 // visual studio 2019 +# define C4_MSVC_2019 +# elif _MSC_VER >= 1910 +# define C4_MSVC_VERSION C4_MSVC_VERSION_2017 // visual studio 2017 +# define C4_MSVC_2017 +# elif _MSC_VER == 1900 +# define C4_MSVC_VERSION C4_MSVC_VERSION_2015 // visual studio 2015 +# define C4_MSVC_2015 +# elif _MSC_VER == 1800 +# error "MSVC version not supported" +# define C4_MSVC_VERSION C4_MSVC_VERSION_2013 // visual studio 2013 +# define C4_MSVC_2013 +# elif _MSC_VER == 1700 +# error "MSVC version not supported" +# define C4_MSVC_VERSION C4_MSVC_VERSION_2012 // visual studio 2012 +# define C4_MSVC_2012 +# elif _MSC_VER == 1600 +# error "MSVC version not supported" +# define C4_MSVC_VERSION 10 // visual studio 2010 +# define C4_MSVC_2010 +# elif _MSC_VER == 1500 +# error "MSVC version not supported" +# define C4_MSVC_VERSION 09 // visual studio 2008 +# define C4_MSVC_2008 +# elif _MSC_VER == 1400 +# error "MSVC version not supported" +# define C4_MSVC_VERSION 08 // visual studio 2005 +# define C4_MSVC_2005 +# else +# error "MSVC version not supported" +# endif // _MSC_VER +#else +# define C4_MSVC_VERSION 0 // visual studio not present +# define C4_GCC_LIKE +# ifdef __INTEL_COMPILER // check ICC before checking GCC, as ICC defines __GNUC__ too +# define C4_ICC +# define C4_ICC_VERSION __INTEL_COMPILER +# elif defined(__APPLE_CC__) +# define C4_XCODE +# if defined(__clang__) +# define C4_CLANG +# ifndef __apple_build_version__ +# define C4_CLANG_VERSION C4_VERSION_ENCODED(__clang_major__, __clang_minor__, __clang_patchlevel__) +# else +# define C4_CLANG_VERSION __apple_build_version__ +# endif +# else +# define C4_XCODE_VERSION __APPLE_CC__ +# endif +# elif defined(__clang__) +# define C4_CLANG +# ifndef __apple_build_version__ +# define C4_CLANG_VERSION C4_VERSION_ENCODED(__clang_major__, __clang_minor__, __clang_patchlevel__) +# else +# define C4_CLANG_VERSION __apple_build_version__ +# endif +# elif defined(__GNUC__) +# define C4_GCC +# if defined(__GNUC_PATCHLEVEL__) +# define C4_GCC_VERSION C4_VERSION_ENCODED(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +# else +# define C4_GCC_VERSION C4_VERSION_ENCODED(__GNUC__, __GNUC_MINOR__, 0) +# endif +# if __GNUC__ < 5 +# if __GNUC__ == 4 && __GNUC_MINOR__ >= 8 +// provided by cmake sub-project +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/gcc-4.8.hpp +//# include "c4/gcc-4.8.hpp" +#if !defined(C4_GCC_4_8_HPP_) && !defined(_C4_GCC_4_8_HPP_) +#error "amalgamate: file c4/gcc-4.8.hpp must have been included at this point" +#endif /* C4_GCC_4_8_HPP_ */ + +# else +// we do not support GCC < 4.8: +// * misses std::is_trivially_copyable +// * misses std::align +// * -Wshadow has false positives when a local function parameter has the same name as a method +# error "GCC < 4.8 is not supported" +# endif +# endif +# endif +#endif // defined(C4_WIN) && defined(_MSC_VER) + +#endif /* _C4_COMPILER_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/compiler.hpp) + +// these includes are needed to work around conditional +// includes in the gcc4.8 shim +#include +#include +#include + + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// cmake/compat/c4/gcc-4.8.hpp +// https://github.com/biojppm/c4core/cmake/compat/c4/gcc-4.8.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_COMPAT_GCC_4_8_HPP_ +#define _C4_COMPAT_GCC_4_8_HPP_ + +#if __GNUC__ == 4 && __GNUC_MINOR__ >= 8 +/* STL polyfills for old GNU compilers */ + +_Pragma("GCC diagnostic ignored \"-Wshadow\"") +_Pragma("GCC diagnostic ignored \"-Wmissing-field-initializers\"") + +#if __cplusplus +//included above: +//#include +//included above: +//#include + +namespace std { + +template +struct is_trivially_copyable : public integral_constant::value && __has_trivial_destructor(_Tp) && + (__has_trivial_constructor(_Tp) || __has_trivial_copy(_Tp) || __has_trivial_assign(_Tp))> +{ }; + +template +using is_trivially_copy_constructible = has_trivial_copy_constructor<_Tp>; + +template +using is_trivially_default_constructible = has_trivial_default_constructor<_Tp>; + +template +using is_trivially_copy_assignable = has_trivial_copy_assign<_Tp>; + +/* not supported */ +template +struct is_trivially_move_constructible : false_type +{ }; + +/* not supported */ +template +struct is_trivially_move_assignable : false_type +{ }; + +inline void *align(size_t __align, size_t __size, void*& __ptr, size_t& __space) noexcept +{ + if (__space < __size) + return nullptr; + const auto __intptr = reinterpret_cast(__ptr); + const auto __aligned = (__intptr - 1u + __align) & -__align; + const auto __diff = __aligned - __intptr; + if (__diff > (__space - __size)) + return nullptr; + else + { + __space -= __diff; + return __ptr = reinterpret_cast(__aligned); + } +} +typedef long double max_align_t ; + +} +#else // __cplusplus + +//included above: +//#include +// see https://sourceware.org/bugzilla/show_bug.cgi?id=25399 (ubuntu gcc-4.8) +#define memset(s, c, count) __builtin_memset(s, c, count) + +#endif // __cplusplus + +#endif // __GNUC__ == 4 && __GNUC_MINOR__ >= 8 + +#endif // _C4_COMPAT_GCC_4_8_HPP_ + + +// (end https://github.com/biojppm/c4core/cmake/compat/c4/gcc-4.8.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/language.hpp +// https://github.com/biojppm/c4core/src/c4/language.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_LANGUAGE_HPP_ +#define _C4_LANGUAGE_HPP_ + +/** @file language.hpp Provides language standard information macros and + * compiler agnostic utility macros: namespace facilities, function attributes, + * variable attributes, etc. + * @ingroup basic_headers */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/preprocessor.hpp +//#include "c4/preprocessor.hpp" +#if !defined(C4_PREPROCESSOR_HPP_) && !defined(_C4_PREPROCESSOR_HPP_) +#error "amalgamate: file c4/preprocessor.hpp must have been included at this point" +#endif /* C4_PREPROCESSOR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/compiler.hpp +//#include "c4/compiler.hpp" +#if !defined(C4_COMPILER_HPP_) && !defined(_C4_COMPILER_HPP_) +#error "amalgamate: file c4/compiler.hpp must have been included at this point" +#endif /* C4_COMPILER_HPP_ */ + + +/* Detect C++ standard. + * @see http://stackoverflow.com/a/7132549/5875572 */ +#ifndef C4_CPP +# ifdef _MSC_VER +# if _MSC_VER >= 1910 // >VS2015: VS2017, VS2019 +# if (!defined(_MSVC_LANG)) +# error _MSVC not defined +# endif +# if _MSVC_LANG >= 201705L +# define C4_CPP 20 +# define C4_CPP20 +# elif _MSVC_LANG == 201703L +# define C4_CPP 17 +# define C4_CPP17 +# elif _MSVC_LANG >= 201402L +# define C4_CPP 14 +# define C4_CPP14 +# elif _MSVC_LANG >= 201103L +# define C4_CPP 11 +# define C4_CPP11 +# else +# error C++ lesser than C++11 not supported +# endif +# else +# if _MSC_VER == 1900 +# define C4_CPP 14 // VS2015 is c++14 https://devblogs.microsoft.com/cppblog/c111417-features-in-vs-2015-rtm/ +# define C4_CPP14 +# elif _MSC_VER == 1800 // VS2013 +# define C4_CPP 11 +# define C4_CPP11 +# else +# error C++ lesser than C++11 not supported +# endif +# endif +# elif defined(__INTEL_COMPILER) // https://software.intel.com/en-us/node/524490 +# ifdef __INTEL_CXX20_MODE__ // not sure about this +# define C4_CPP 20 +# define C4_CPP20 +# elif defined __INTEL_CXX17_MODE__ // not sure about this +# define C4_CPP 17 +# define C4_CPP17 +# elif defined __INTEL_CXX14_MODE__ // not sure about this +# define C4_CPP 14 +# define C4_CPP14 +# elif defined __INTEL_CXX11_MODE__ +# define C4_CPP 11 +# define C4_CPP11 +# else +# error C++ lesser than C++11 not supported +# endif +# else +# ifndef __cplusplus +# error __cplusplus is not defined? +# endif +# if __cplusplus == 1 +# error cannot handle __cplusplus==1 +# elif __cplusplus >= 201709L +# define C4_CPP 20 +# define C4_CPP20 +# elif __cplusplus >= 201703L +# define C4_CPP 17 +# define C4_CPP17 +# elif __cplusplus >= 201402L +# define C4_CPP 14 +# define C4_CPP14 +# elif __cplusplus >= 201103L +# define C4_CPP 11 +# define C4_CPP11 +# elif __cplusplus >= 199711L +# error C++ lesser than C++11 not supported +# endif +# endif +#else +# ifdef C4_CPP == 20 +# define C4_CPP20 +# elif C4_CPP == 17 +# define C4_CPP17 +# elif C4_CPP == 14 +# define C4_CPP14 +# elif C4_CPP == 11 +# define C4_CPP11 +# elif C4_CPP == 98 +# define C4_CPP98 +# error C++ lesser than C++11 not supported +# else +# error C4_CPP must be one of 20, 17, 14, 11, 98 +# endif +#endif + +#ifdef C4_CPP20 +# define C4_CPP17 +# define C4_CPP14 +# define C4_CPP11 +#elif defined(C4_CPP17) +# define C4_CPP14 +# define C4_CPP11 +#elif defined(C4_CPP14) +# define C4_CPP11 +#endif + +/** lifted from this answer: http://stackoverflow.com/a/20170989/5875572 */ +#ifndef _MSC_VER +# if __cplusplus < 201103 +# define C4_CONSTEXPR11 +# define C4_CONSTEXPR14 +//# define C4_NOEXCEPT +# elif __cplusplus == 201103 +# define C4_CONSTEXPR11 constexpr +# define C4_CONSTEXPR14 +//# define C4_NOEXCEPT noexcept +# else +# define C4_CONSTEXPR11 constexpr +# define C4_CONSTEXPR14 constexpr +//# define C4_NOEXCEPT noexcept +# endif +#else // _MSC_VER +# if _MSC_VER < 1900 +# define C4_CONSTEXPR11 +# define C4_CONSTEXPR14 +//# define C4_NOEXCEPT +# elif _MSC_VER < 2000 +# define C4_CONSTEXPR11 constexpr +# define C4_CONSTEXPR14 +//# define C4_NOEXCEPT noexcept +# else +# define C4_CONSTEXPR11 constexpr +# define C4_CONSTEXPR14 constexpr +//# define C4_NOEXCEPT noexcept +# endif +#endif // _MSC_VER + + +#if C4_CPP < 17 +#define C4_IF_CONSTEXPR +#define C4_INLINE_CONSTEXPR constexpr +#else +#define C4_IF_CONSTEXPR constexpr +#define C4_INLINE_CONSTEXPR inline constexpr +#endif + + +//------------------------------------------------------------ + +#define _C4_BEGIN_NAMESPACE(ns) namespace ns { +#define _C4_END_NAMESPACE(ns) } + +// MSVC cant handle the C4_FOR_EACH macro... need to fix this +//#define C4_BEGIN_NAMESPACE(...) C4_FOR_EACH_SEP(_C4_BEGIN_NAMESPACE, , __VA_ARGS__) +//#define C4_END_NAMESPACE(...) C4_FOR_EACH_SEP(_C4_END_NAMESPACE, , __VA_ARGS__) +#define C4_BEGIN_NAMESPACE(ns) namespace ns { +#define C4_END_NAMESPACE(ns) } + +#define C4_BEGIN_HIDDEN_NAMESPACE namespace /*hidden*/ { +#define C4_END_HIDDEN_NAMESPACE } /* namespace hidden */ + +//------------------------------------------------------------ + +#ifndef C4_API +# if defined(_MSC_VER) +# if defined(C4_EXPORT) +# define C4_API __declspec(dllexport) +# elif defined(C4_IMPORT) +# define C4_API __declspec(dllimport) +# else +# define C4_API +# endif +# else +# define C4_API +# endif +#endif + +#ifndef _MSC_VER ///< @todo assuming gcc-like compiler. check it is actually so. +/** for function attributes in GCC, + * @see https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes */ +/** for __builtin functions in GCC, + * @see https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html */ +# define C4_RESTRICT __restrict__ +# define C4_RESTRICT_FN __attribute__((restrict)) +# define C4_NO_INLINE __attribute__((noinline)) +# define C4_ALWAYS_INLINE inline __attribute__((always_inline)) +# define C4_CONST __attribute__((const)) +# define C4_PURE __attribute__((pure)) +/** force inlining of every callee function */ +# define C4_FLATTEN __atribute__((flatten)) +/** mark a function as hot, ie as having a visible impact in CPU time + * thus making it more likely to inline, etc + * @see http://stackoverflow.com/questions/15028990/semantics-of-gcc-hot-attribute */ +# define C4_HOT __attribute__((hot)) +/** mark a function as cold, ie as NOT having a visible impact in CPU time + * @see http://stackoverflow.com/questions/15028990/semantics-of-gcc-hot-attribute */ +# define C4_COLD __attribute__((cold)) +# define C4_EXPECT(x, y) __builtin_expect(x, y) ///< @see https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html +# define C4_LIKELY(x) __builtin_expect(x, 1) +# define C4_UNLIKELY(x) __builtin_expect(x, 0) +# define C4_UNREACHABLE() __builtin_unreachable() +# define C4_ATTR_FORMAT(...) //__attribute__((format (__VA_ARGS__))) ///< @see https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes +# define C4_NORETURN __attribute__((noreturn)) +#else +# define C4_RESTRICT __restrict +# define C4_RESTRICT_FN __declspec(restrict) +# define C4_NO_INLINE __declspec(noinline) +# define C4_ALWAYS_INLINE inline __forceinline +/** these are not available in VS AFAIK */ +# define C4_CONST +# define C4_PURE +# define C4_FLATTEN +# define C4_HOT /** @todo */ +# define C4_COLD /** @todo */ +# define C4_EXPECT(x, y) x /** @todo */ +# define C4_LIKELY(x) x /** @todo */ +# define C4_UNLIKELY(x) x /** @todo */ +# define C4_UNREACHABLE() /** @todo */ +# define C4_ATTR_FORMAT(...) /** */ +# define C4_NORETURN /** @todo */ +#endif + +#ifndef _MSC_VER +# define C4_FUNC __FUNCTION__ +# define C4_PRETTY_FUNC __PRETTY_FUNCTION__ +#else /// @todo assuming gcc-like compiler. check it is actually so. +# define C4_FUNC __FUNCTION__ +# define C4_PRETTY_FUNC __FUNCSIG__ +#endif + +/** prevent compiler warnings about a specific var being unused */ +#define C4_UNUSED(var) (void)var + +#if C4_CPP >= 17 +#define C4_STATIC_ASSERT(cond) static_assert(cond) +#else +#define C4_STATIC_ASSERT(cond) static_assert((cond), #cond) +#endif +#define C4_STATIC_ASSERT_MSG(cond, msg) static_assert((cond), #cond ": " msg) + +/** @def C4_DONT_OPTIMIZE idea lifted from GoogleBenchmark. + * @see https://github.com/google/benchmark/blob/master/include/benchmark/benchmark_api.h */ +namespace c4 { +namespace detail { +#ifdef __GNUC__ +# define C4_DONT_OPTIMIZE(var) c4::detail::dont_optimize(var) +template< class T > +C4_ALWAYS_INLINE void dont_optimize(T const& value) { asm volatile("" : : "g"(value) : "memory"); } +#else +# define C4_DONT_OPTIMIZE(var) c4::detail::use_char_pointer(reinterpret_cast< const char* >(&var)) +void use_char_pointer(char const volatile*); +#endif +} // namespace detail +} // namespace c4 + +/** @def C4_KEEP_EMPTY_LOOP prevent an empty loop from being optimized out. + * @see http://stackoverflow.com/a/7084193/5875572 */ +#ifndef _MSC_VER +# define C4_KEEP_EMPTY_LOOP { asm(""); } +#else +# define C4_KEEP_EMPTY_LOOP { char c; C4_DONT_OPTIMIZE(c); } +#endif + +/** @def C4_VA_LIST_REUSE_MUST_COPY + * @todo I strongly suspect that this is actually only in UNIX platforms. revisit this. */ +#ifdef __GNUC__ +# define C4_VA_LIST_REUSE_MUST_COPY +#endif + +#endif /* _C4_LANGUAGE_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/language.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/types.hpp +// https://github.com/biojppm/c4core/src/c4/types.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_TYPES_HPP_ +#define _C4_TYPES_HPP_ + +//included above: +//#include +#include +//included above: +//#include + +#if __cplusplus >= 201103L +#include // for integer_sequence and friends +#endif + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/preprocessor.hpp +//#include "c4/preprocessor.hpp" +#if !defined(C4_PREPROCESSOR_HPP_) && !defined(_C4_PREPROCESSOR_HPP_) +#error "amalgamate: file c4/preprocessor.hpp must have been included at this point" +#endif /* C4_PREPROCESSOR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/language.hpp +//#include "c4/language.hpp" +#if !defined(C4_LANGUAGE_HPP_) && !defined(_C4_LANGUAGE_HPP_) +#error "amalgamate: file c4/language.hpp must have been included at this point" +#endif /* C4_LANGUAGE_HPP_ */ + + +/** @file types.hpp basic types, and utility macros and traits for types. + * @ingroup basic_headers */ + +/** @defgroup types Type utilities */ + +namespace c4 { + +/** @defgroup intrinsic_types Intrinsic types + * @ingroup types + * @{ */ + +using cbyte = const char; /**< a constant byte */ +using byte = char; /**< a mutable byte */ + +using i8 = int8_t; +using i16 = int16_t; +using i32 = int32_t; +using i64 = int64_t; +using u8 = uint8_t; +using u16 = uint16_t; +using u32 = uint32_t; +using u64 = uint64_t; + +using f32 = float; +using f64 = double; + +using ssize_t = typename std::make_signed::type; + +/** @} */ + +//-------------------------------------------------- + +/** @defgroup utility_types Utility types + * @ingroup types + * @{ */ + +// some tag types + +/** a tag type for initializing the containers with variadic arguments a la + * initializer_list, minus the initializer_list overload problems. + */ +struct aggregate_t {}; +/** @see aggregate_t */ +constexpr const aggregate_t aggregate{}; + +/** a tag type for specifying the initial capacity of allocatable contiguous storage */ +struct with_capacity_t {}; +/** @see with_capacity_t */ +constexpr const with_capacity_t with_capacity{}; + +/** a tag type for disambiguating template parameter packs in variadic template overloads */ +struct varargs_t {}; +/** @see with_capacity_t */ +constexpr const varargs_t varargs{}; + + +//-------------------------------------------------- + +/** whether a value should be used in place of a const-reference in argument passing. */ +template +struct cref_uses_val +{ + enum { value = ( + std::is_scalar::value + || + ( +#if C4_CPP >= 20 + (std::is_trivially_copyable::value && std::is_standard_layout::value) +#else + std::is_pod::value +#endif + && + sizeof(T) <= sizeof(size_t))) }; +}; +/** utility macro to override the default behaviour for c4::fastcref + @see fastcref */ +#define C4_CREF_USES_VAL(T) \ +template<> \ +struct cref_uses_val \ +{ \ + enum { value = true }; \ +}; + +/** Whether to use pass-by-value or pass-by-const-reference in a function argument + * or return type. */ +template +using fastcref = typename std::conditional::value, T, T const&>::type; + +//-------------------------------------------------- + +/** Just what its name says. Useful sometimes as a default empty policy class. */ +struct EmptyStruct +{ + template EmptyStruct(T && ...){} +}; + +/** Just what its name says. Useful sometimes as a default policy class to + * be inherited from. */ +struct EmptyStructVirtual +{ + virtual ~EmptyStructVirtual() = default; + template EmptyStructVirtual(T && ...){} +}; + + +/** */ +template +struct inheritfrom : public T {}; + +//-------------------------------------------------- +// Utilities to make a class obey size restrictions (eg, min size or size multiple of). +// DirectX usually makes this restriction with uniform buffers. +// This is also useful for padding to prevent false-sharing. + +/** how many bytes must be added to size such that the result is at least minsize? */ +C4_ALWAYS_INLINE constexpr size_t min_remainder(size_t size, size_t minsize) noexcept +{ + return size < minsize ? minsize-size : 0; +} + +/** how many bytes must be added to size such that the result is a multiple of multipleof? */ +C4_ALWAYS_INLINE constexpr size_t mult_remainder(size_t size, size_t multipleof) noexcept +{ + return (((size % multipleof) != 0) ? (multipleof-(size % multipleof)) : 0); +} + +/* force the following class to be tightly packed. */ +#pragma pack(push, 1) +/** pad a class with more bytes at the end. + * @see http://stackoverflow.com/questions/21092415/force-c-structure-to-pack-tightly */ +template +struct Padded : public T +{ + using T::T; + using T::operator=; + Padded(T const& val) : T(val) {} + Padded(T && val) : T(val) {} + char ___c4padspace___[BytesToPadAtEnd]; +}; +#pragma pack(pop) +/** When the padding argument is 0, we cannot declare the char[] array. */ +template +struct Padded : public T +{ + using T::T; + using T::operator=; + Padded(T const& val) : T(val) {} + Padded(T && val) : T(val) {} +}; + +/** make T have a size which is at least Min bytes */ +template +using MinSized = Padded; + +/** make T have a size which is a multiple of Mult bytes */ +template +using MultSized = Padded; + +/** make T have a size which is simultaneously: + * -bigger or equal than Min + * -a multiple of Mult */ +template +using MinMultSized = MultSized, Mult>; + +/** make T be suitable for use as a uniform buffer. (at least with DirectX). */ +template +using UbufSized = MinMultSized; + + +//----------------------------------------------------------------------------- + +#define C4_NO_COPY_CTOR(ty) ty(ty const&) = delete +#define C4_NO_MOVE_CTOR(ty) ty(ty &&) = delete +#define C4_NO_COPY_ASSIGN(ty) ty& operator=(ty const&) = delete +#define C4_NO_MOVE_ASSIGN(ty) ty& operator=(ty &&) = delete +#define C4_DEFAULT_COPY_CTOR(ty) ty(ty const&) noexcept = default +#define C4_DEFAULT_MOVE_CTOR(ty) ty(ty &&) noexcept = default +#define C4_DEFAULT_COPY_ASSIGN(ty) ty& operator=(ty const&) noexcept = default +#define C4_DEFAULT_MOVE_ASSIGN(ty) ty& operator=(ty &&) noexcept = default + +#define C4_NO_COPY_OR_MOVE_CTOR(ty) \ + C4_NO_COPY_CTOR(ty); \ + C4_NO_MOVE_CTOR(ty) + +#define C4_NO_COPY_OR_MOVE_ASSIGN(ty) \ + C4_NO_COPY_ASSIGN(ty); \ + C4_NO_MOVE_ASSIGN(ty) + +#define C4_NO_COPY_OR_MOVE(ty) \ + C4_NO_COPY_OR_MOVE_CTOR(ty); \ + C4_NO_COPY_OR_MOVE_ASSIGN(ty) + +#define C4_DEFAULT_COPY_AND_MOVE_CTOR(ty) \ + C4_DEFAULT_COPY_CTOR(ty); \ + C4_DEFAULT_MOVE_CTOR(ty) + +#define C4_DEFAULT_COPY_AND_MOVE_ASSIGN(ty) \ + C4_DEFAULT_COPY_ASSIGN(ty); \ + C4_DEFAULT_MOVE_ASSIGN(ty) + +#define C4_DEFAULT_COPY_AND_MOVE(ty) \ + C4_DEFAULT_COPY_AND_MOVE_CTOR(ty); \ + C4_DEFAULT_COPY_AND_MOVE_ASSIGN(ty) + +/** @see https://en.cppreference.com/w/cpp/named_req/TriviallyCopyable */ +#define C4_MUST_BE_TRIVIAL_COPY(ty) \ + static_assert(std::is_trivially_copyable::value, #ty " must be trivially copyable") + +/** @} */ + + +//----------------------------------------------------------------------------- + +/** @defgroup traits_types Type traits utilities + * @ingroup types + * @{ */ + +// http://stackoverflow.com/questions/10821380/is-t-an-instance-of-a-template-in-c +template class X, typename T> struct is_instance_of_tpl : std::false_type {}; +template class X, typename... Y> struct is_instance_of_tpl> : std::true_type {}; + +//----------------------------------------------------------------------------- + +/** SFINAE. use this macro to enable a template function overload +based on a compile-time condition. +@code +// define an overload for a non-pod type +template::value)> +void foo() { std::cout << "pod type\n"; } + +// define an overload for a non-pod type +template::value)> +void foo() { std::cout << "nonpod type\n"; } + +struct non_pod +{ + non_pod() : name("asdfkjhasdkjh") {} + const char *name; +}; + +int main() +{ + foo(); // prints "pod type" + foo(); // prints "nonpod type" +} +@endcode */ +#define C4_REQUIRE_T(cond) typename std::enable_if::type* = nullptr + +/** enable_if for a return type + * @see C4_REQUIRE_T */ +#define C4_REQUIRE_R(cond, type_) typename std::enable_if::type + +//----------------------------------------------------------------------------- +/** define a traits class reporting whether a type provides a member typedef */ +#define C4_DEFINE_HAS_TYPEDEF(member_typedef) \ +template \ +struct has_##stype \ +{ \ +private: \ + \ + typedef char yes; \ + typedef struct { char array[2]; } no; \ + \ + template \ + static yes _test(typename C::member_typedef*); \ + \ + template \ + static no _test(...); \ + \ +public: \ + \ + enum { value = (sizeof(_test(0)) == sizeof(yes)) }; \ + \ +} + + +/** @} */ + + +//----------------------------------------------------------------------------- + + +/** @defgroup type_declarations Type declaration utilities + * @ingroup types + * @{ */ + +#define _c4_DEFINE_ARRAY_TYPES_WITHOUT_ITERATOR(T, I) \ + \ + using size_type = I; \ + using ssize_type = typename std::make_signed::type; \ + using difference_type = typename std::make_signed::type; \ + \ + using value_type = T; \ + using pointer = T*; \ + using const_pointer = T const*; \ + using reference = T&; \ + using const_reference = T const& + +#define _c4_DEFINE_TUPLE_ARRAY_TYPES_WITHOUT_ITERATOR(interior_types, I) \ + \ + using size_type = I; \ + using ssize_type = typename std::make_signed::type; \ + using difference_type = typename std::make_signed::type; \ + \ + template using value_type = typename std::tuple_element< n, std::tuple>::type; \ + template using pointer = value_type*; \ + template using const_pointer = value_type const*; \ + template using reference = value_type&; \ + template using const_reference = value_type const& + + +#define _c4_DEFINE_ARRAY_TYPES(T, I) \ + \ + _c4_DEFINE_ARRAY_TYPES_WITHOUT_ITERATOR(T, I); \ + \ + using iterator = T*; \ + using const_iterator = T const*; \ + using reverse_iterator = std::reverse_iterator; \ + using const_reverse_iterator = std::reverse_iterator + + +#define _c4_DEFINE_TUPLE_ARRAY_TYPES(interior_types, I) \ + \ + _c4_DEFINE_TUPLE_ARRAY_TYPES_WITHOUT_ITERATOR(interior_types, I); \ + \ + template using iterator = value_type*; \ + template using const_iterator = value_type const*; \ + template using reverse_iterator = std::reverse_iterator< value_type*>; \ + template using const_reverse_iterator = std::reverse_iterator< value_type const*> + + + +/** @} */ + + +//----------------------------------------------------------------------------- + + +/** @defgroup compatility_utilities Backport implementation of some Modern C++ utilities + * @ingroup types + * @{ */ + +//----------------------------------------------------------------------------- +// index_sequence and friends are available only for C++14 and later. +// A C++11 implementation is provided here. +// This implementation was copied over from clang. +// see http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 + +#if __cplusplus > 201103L + +using std::integer_sequence; +using std::index_sequence; +using std::make_integer_sequence; +using std::make_index_sequence; +using std::index_sequence_for; + +#else + +/** C++11 implementation of integer sequence + * @see https://en.cppreference.com/w/cpp/utility/integer_sequence + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ +template +struct integer_sequence +{ + static_assert(std::is_integral<_Tp>::value, + "std::integer_sequence can only be instantiated with an integral type" ); + using value_type = _Tp; + static constexpr size_t size() noexcept { return sizeof...(_Ip); } +}; + +/** C++11 implementation of index sequence + * @see https://en.cppreference.com/w/cpp/utility/integer_sequence + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ +template +using index_sequence = integer_sequence; + +/** @cond DONT_DOCUMENT_THIS */ +namespace __detail { + +template +struct __repeat; + +template +struct __repeat, _Extra...> +{ + using type = integer_sequence<_Tp, + _Np..., + sizeof...(_Np) + _Np..., + 2 * sizeof...(_Np) + _Np..., + 3 * sizeof...(_Np) + _Np..., + 4 * sizeof...(_Np) + _Np..., + 5 * sizeof...(_Np) + _Np..., + 6 * sizeof...(_Np) + _Np..., + 7 * sizeof...(_Np) + _Np..., + _Extra...>; +}; + +template struct __parity; +template struct __make : __parity<_Np % 8>::template __pmake<_Np> {}; + +template<> struct __make<0> { using type = integer_sequence; }; +template<> struct __make<1> { using type = integer_sequence; }; +template<> struct __make<2> { using type = integer_sequence; }; +template<> struct __make<3> { using type = integer_sequence; }; +template<> struct __make<4> { using type = integer_sequence; }; +template<> struct __make<5> { using type = integer_sequence; }; +template<> struct __make<6> { using type = integer_sequence; }; +template<> struct __make<7> { using type = integer_sequence; }; + +template<> struct __parity<0> { template struct __pmake : __repeat::type> {}; }; +template<> struct __parity<1> { template struct __pmake : __repeat::type, _Np - 1> {}; }; +template<> struct __parity<2> { template struct __pmake : __repeat::type, _Np - 2, _Np - 1> {}; }; +template<> struct __parity<3> { template struct __pmake : __repeat::type, _Np - 3, _Np - 2, _Np - 1> {}; }; +template<> struct __parity<4> { template struct __pmake : __repeat::type, _Np - 4, _Np - 3, _Np - 2, _Np - 1> {}; }; +template<> struct __parity<5> { template struct __pmake : __repeat::type, _Np - 5, _Np - 4, _Np - 3, _Np - 2, _Np - 1> {}; }; +template<> struct __parity<6> { template struct __pmake : __repeat::type, _Np - 6, _Np - 5, _Np - 4, _Np - 3, _Np - 2, _Np - 1> {}; }; +template<> struct __parity<7> { template struct __pmake : __repeat::type, _Np - 7, _Np - 6, _Np - 5, _Np - 4, _Np - 3, _Np - 2, _Np - 1> {}; }; + +template +struct __convert +{ + template struct __result; + template<_Tp ..._Np> struct __result> + { + using type = integer_sequence<_Up, _Np...>; + }; +}; + +template +struct __convert<_Tp, _Tp> +{ + template struct __result + { + using type = _Up; + }; +}; + +template +using __make_integer_sequence_unchecked = typename __detail::__convert::template __result::type>::type; + +template +struct __make_integer_sequence +{ + static_assert(std::is_integral<_Tp>::value, + "std::make_integer_sequence can only be instantiated with an integral type" ); + static_assert(0 <= _Ep, "std::make_integer_sequence input shall not be negative"); + typedef __make_integer_sequence_unchecked<_Tp, _Ep> type; +}; + +} // namespace __detail +/** @endcond */ + + +/** C++11 implementation of index sequence + * @see https://en.cppreference.com/w/cpp/utility/integer_sequence + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ +template +using make_integer_sequence = typename __detail::__make_integer_sequence<_Tp, _Np>::type; + +/** C++11 implementation of index sequence + * @see https://en.cppreference.com/w/cpp/utility/integer_sequence + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ +template +using make_index_sequence = make_integer_sequence; + +/** C++11 implementation of index sequence + * @see https://en.cppreference.com/w/cpp/utility/integer_sequence + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ +template +using index_sequence_for = make_index_sequence; +#endif + +/** @} */ + + +} // namespace c4 + +#endif /* _C4_TYPES_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/types.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/config.hpp +// https://github.com/biojppm/c4core/src/c4/config.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_CONFIG_HPP_ +#define _C4_CONFIG_HPP_ + +/** @defgroup basic_headers Basic headers + * @brief Headers providing basic macros, platform+cpu+compiler information, + * C++ facilities and basic typedefs. */ + +/** @file config.hpp Contains configuration defines and includes the basic_headers. + * @ingroup basic_headers */ + +//#define C4_DEBUG + +#define C4_ERROR_SHOWS_FILELINE +//#define C4_ERROR_SHOWS_FUNC +//#define C4_ERROR_THROWS_EXCEPTION +//#define C4_NO_ALLOC_DEFAULTS +//#define C4_REDEFINE_CPPNEW + +#ifndef C4_SIZE_TYPE +# define C4_SIZE_TYPE size_t +#endif + +#ifndef C4_STR_SIZE_TYPE +# define C4_STR_SIZE_TYPE C4_SIZE_TYPE +#endif + +#ifndef C4_TIME_TYPE +# define C4_TIME_TYPE double +#endif + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/export.hpp +//#include "c4/export.hpp" +#if !defined(C4_EXPORT_HPP_) && !defined(_C4_EXPORT_HPP_) +#error "amalgamate: file c4/export.hpp must have been included at this point" +#endif /* C4_EXPORT_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/preprocessor.hpp +//#include "c4/preprocessor.hpp" +#if !defined(C4_PREPROCESSOR_HPP_) && !defined(_C4_PREPROCESSOR_HPP_) +#error "amalgamate: file c4/preprocessor.hpp must have been included at this point" +#endif /* C4_PREPROCESSOR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/platform.hpp +//#include "c4/platform.hpp" +#if !defined(C4_PLATFORM_HPP_) && !defined(_C4_PLATFORM_HPP_) +#error "amalgamate: file c4/platform.hpp must have been included at this point" +#endif /* C4_PLATFORM_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/cpu.hpp +//#include "c4/cpu.hpp" +#if !defined(C4_CPU_HPP_) && !defined(_C4_CPU_HPP_) +#error "amalgamate: file c4/cpu.hpp must have been included at this point" +#endif /* C4_CPU_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/compiler.hpp +//#include "c4/compiler.hpp" +#if !defined(C4_COMPILER_HPP_) && !defined(_C4_COMPILER_HPP_) +#error "amalgamate: file c4/compiler.hpp must have been included at this point" +#endif /* C4_COMPILER_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/language.hpp +//#include "c4/language.hpp" +#if !defined(C4_LANGUAGE_HPP_) && !defined(_C4_LANGUAGE_HPP_) +#error "amalgamate: file c4/language.hpp must have been included at this point" +#endif /* C4_LANGUAGE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/types.hpp +//#include "c4/types.hpp" +#if !defined(C4_TYPES_HPP_) && !defined(_C4_TYPES_HPP_) +#error "amalgamate: file c4/types.hpp must have been included at this point" +#endif /* C4_TYPES_HPP_ */ + + +#endif // _C4_CONFIG_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/config.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/ext/debugbreak/debugbreak.h +// https://github.com/biojppm/c4core/src/c4/ext/debugbreak/debugbreak.h +//-------------------------------------------------------------------------------- +//******************************************************************************** + +/* Copyright (c) 2011-2021, Scott Tsai + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef DEBUG_BREAK_H +#define DEBUG_BREAK_H + +#ifdef _MSC_VER + +#define debug_break __debugbreak + +#else + +#ifdef __cplusplus +extern "C" { +#endif + +#define DEBUG_BREAK_USE_TRAP_INSTRUCTION 1 +#define DEBUG_BREAK_USE_BULTIN_TRAP 2 +#define DEBUG_BREAK_USE_SIGTRAP 3 + +#if defined(__i386__) || defined(__x86_64__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__inline__ static void trap_instruction(void) +{ + __asm__ volatile("int $0x03"); +} +#elif defined(__thumb__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +/* FIXME: handle __THUMB_INTERWORK__ */ +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'arm-linux-tdep.c' in GDB source. + * Both instruction sequences below work. */ +#if 1 + /* 'eabi_linux_thumb_le_breakpoint' */ + __asm__ volatile(".inst 0xde01"); +#else + /* 'eabi_linux_thumb2_le_breakpoint' */ + __asm__ volatile(".inst.w 0xf7f0a000"); +#endif + + /* Known problem: + * After a breakpoint hit, can't 'stepi', 'step', or 'continue' in GDB. + * 'step' would keep getting stuck on the same instruction. + * + * Workaround: use the new GDB commands 'debugbreak-step' and + * 'debugbreak-continue' that become available + * after you source the script from GDB: + * + * $ gdb -x debugbreak-gdb.py <... USUAL ARGUMENTS ...> + * + * 'debugbreak-step' would jump over the breakpoint instruction with + * roughly equivalent of: + * (gdb) set $instruction_len = 2 + * (gdb) tbreak *($pc + $instruction_len) + * (gdb) jump *($pc + $instruction_len) + */ +} +#elif defined(__arm__) && !defined(__thumb__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'arm-linux-tdep.c' in GDB source, + * 'eabi_linux_arm_le_breakpoint' */ + __asm__ volatile(".inst 0xe7f001f0"); + /* Known problem: + * Same problem and workaround as Thumb mode */ +} +#elif defined(__aarch64__) && defined(__APPLE__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_BULTIN_DEBUGTRAP +#elif defined(__aarch64__) + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'aarch64-tdep.c' in GDB source, + * 'aarch64_default_breakpoint' */ + __asm__ volatile(".inst 0xd4200000"); +} +#elif defined(__powerpc__) + /* PPC 32 or 64-bit, big or little endian */ + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'rs6000-tdep.c' in GDB source, + * 'rs6000_breakpoint' */ + __asm__ volatile(".4byte 0x7d821008"); + + /* Known problem: + * After a breakpoint hit, can't 'stepi', 'step', or 'continue' in GDB. + * 'step' stuck on the same instruction ("twge r2,r2"). + * + * The workaround is the same as ARM Thumb mode: use debugbreak-gdb.py + * or manually jump over the instruction. */ +} +#elif defined(__riscv) + /* RISC-V 32 or 64-bit, whether the "C" extension + * for compressed, 16-bit instructions are supported or not */ + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void trap_instruction(void) +{ + /* See 'riscv-tdep.c' in GDB source, + * 'riscv_sw_breakpoint_from_kind' */ + __asm__ volatile(".4byte 0x00100073"); +} +#else + #define DEBUG_BREAK_IMPL DEBUG_BREAK_USE_SIGTRAP +#endif + + +#ifndef DEBUG_BREAK_IMPL +#error "debugbreak.h is not supported on this target" +#elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_TRAP_INSTRUCTION +__attribute__((always_inline)) +__inline__ static void debug_break(void) +{ + trap_instruction(); +} +#elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_BULTIN_DEBUGTRAP +__attribute__((always_inline)) +__inline__ static void debug_break(void) +{ + __builtin_debugtrap(); +} +#elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_BULTIN_TRAP +__attribute__((always_inline)) +__inline__ static void debug_break(void) +{ + __builtin_trap(); +} +#elif DEBUG_BREAK_IMPL == DEBUG_BREAK_USE_SIGTRAP +#include +__attribute__((always_inline)) +__inline__ static void debug_break(void) +{ + raise(SIGTRAP); +} +#else +#error "invalid DEBUG_BREAK_IMPL value" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ifdef _MSC_VER */ + +#endif /* ifndef DEBUG_BREAK_H */ + + +// (end https://github.com/biojppm/c4core/src/c4/ext/debugbreak/debugbreak.h) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/error.hpp +// https://github.com/biojppm/c4core/src/c4/error.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_ERROR_HPP_ +#define _C4_ERROR_HPP_ + +/** @file error.hpp Facilities for error reporting and runtime assertions. */ + +/** @defgroup error_checking Error checking */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/config.hpp +//#include "c4/config.hpp" +#if !defined(C4_CONFIG_HPP_) && !defined(_C4_CONFIG_HPP_) +#error "amalgamate: file c4/config.hpp must have been included at this point" +#endif /* C4_CONFIG_HPP_ */ + + +#ifdef _DOXYGEN_ + /** if this is defined and exceptions are enabled, then calls to C4_ERROR() + * will throw an exception + * @ingroup error_checking */ +# define C4_EXCEPTIONS_ENABLED + /** if this is defined and exceptions are enabled, then calls to C4_ERROR() + * will throw an exception + * @see C4_EXCEPTIONS_ENABLED + * @ingroup error_checking */ +# define C4_ERROR_THROWS_EXCEPTION + /** evaluates to noexcept when C4_ERROR might be called and + * exceptions are disabled. Otherwise, defaults to nothing. + * @ingroup error_checking */ +# define C4_NOEXCEPT +#endif // _DOXYGEN_ + +#if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION) +# define C4_NOEXCEPT +#else +# define C4_NOEXCEPT noexcept +#endif + + +namespace c4 { +namespace detail { +struct fail_type__ {}; +} // detail +} // c4 +#define C4_STATIC_ERROR(dummy_type, errmsg) \ + static_assert(std::is_same::value, errmsg) + + +//----------------------------------------------------------------------------- + +#define C4_ASSERT_SAME_TYPE(ty1, ty2) \ + C4_STATIC_ASSERT(std::is_same::value) + +#define C4_ASSERT_DIFF_TYPE(ty1, ty2) \ + C4_STATIC_ASSERT( ! std::is_same::value) + + +//----------------------------------------------------------------------------- + +#ifdef _DOXYGEN_ +/** utility macro that triggers a breakpoint when + * the debugger is attached and NDEBUG is not defined. + * @ingroup error_checking */ +# define C4_DEBUG_BREAK() +#endif // _DOXYGEN_ + + +#if defined(NDEBUG) || defined(C4_NO_DEBUG_BREAK) +# define C4_DEBUG_BREAK() +#else +# ifdef __clang__ +# pragma clang diagnostic push +# if !defined(__APPLE_CC__) +# if __clang_major__ >= 10 +# pragma clang diagnostic ignored "-Wgnu-inline-cpp-without-extern" // debugbreak/debugbreak.h:50:16: error: 'gnu_inline' attribute without 'extern' in C++ treated as externally available, this changed in Clang 10 [-Werror,-Wgnu-inline-cpp-without-extern] +# endif +# else +# if __clang_major__ >= 13 +# pragma clang diagnostic ignored "-Wgnu-inline-cpp-without-extern" // debugbreak/debugbreak.h:50:16: error: 'gnu_inline' attribute without 'extern' in C++ treated as externally available, this changed in Clang 10 [-Werror,-Wgnu-inline-cpp-without-extern] +# endif +# endif +# elif defined(__GNUC__) +# endif +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/ext/debugbreak/debugbreak.h +//# include +#if !defined(DEBUG_BREAK_H) && !defined(_DEBUG_BREAK_H) +#error "amalgamate: file c4/ext/debugbreak/debugbreak.h must have been included at this point" +#endif /* DEBUG_BREAK_H */ + +# define C4_DEBUG_BREAK() if(c4::is_debugger_attached()) { ::debug_break(); } +# ifdef __clang__ +# pragma clang diagnostic pop +# elif defined(__GNUC__) +# endif +#endif + +namespace c4 { +C4CORE_EXPORT bool is_debugger_attached(); +} // namespace c4 + + +//----------------------------------------------------------------------------- + +#ifdef __clang__ + /* NOTE: using , ## __VA_ARGS__ to deal with zero-args calls to + * variadic macros is not portable, but works in clang, gcc, msvc, icc. + * clang requires switching off compiler warnings for pedantic mode. + * @see http://stackoverflow.com/questions/32047685/variadic-macro-without-arguments */ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" // warning: token pasting of ',' and __VA_ARGS__ is a GNU extension +#elif defined(__GNUC__) + /* GCC also issues a warning for zero-args calls to variadic macros. + * This warning is switched on with -pedantic and apparently there is no + * easy way to turn it off as with clang. But marking this as a system + * header works. + * @see https://gcc.gnu.org/onlinedocs/cpp/System-Headers.html + * @see http://stackoverflow.com/questions/35587137/ */ +# pragma GCC system_header +#endif + + +//----------------------------------------------------------------------------- + +namespace c4 { + +typedef enum : uint32_t { + /** when an error happens and the debugger is attached, call C4_DEBUG_BREAK(). + * Without effect otherwise. */ + ON_ERROR_DEBUGBREAK = 0x01 << 0, + /** when an error happens log a message. */ + ON_ERROR_LOG = 0x01 << 1, + /** when an error happens invoke a callback if it was set with + * set_error_callback(). */ + ON_ERROR_CALLBACK = 0x01 << 2, + /** when an error happens call std::terminate(). */ + ON_ERROR_ABORT = 0x01 << 3, + /** when an error happens and exceptions are enabled throw an exception. + * Without effect otherwise. */ + ON_ERROR_THROW = 0x01 << 4, + /** the default flags. */ + ON_ERROR_DEFAULTS = ON_ERROR_DEBUGBREAK|ON_ERROR_LOG|ON_ERROR_CALLBACK|ON_ERROR_ABORT +} ErrorFlags_e; +using error_flags = uint32_t; +C4CORE_EXPORT void set_error_flags(error_flags f); +C4CORE_EXPORT error_flags get_error_flags(); + + +using error_callback_type = void (*)(const char* msg, size_t msg_size); +C4CORE_EXPORT void set_error_callback(error_callback_type cb); +C4CORE_EXPORT error_callback_type get_error_callback(); + + +//----------------------------------------------------------------------------- +/** RAII class controling the error settings inside a scope. */ +struct ScopedErrorSettings +{ + error_flags m_flags; + error_callback_type m_callback; + + explicit ScopedErrorSettings(error_callback_type cb) + : m_flags(get_error_flags()), + m_callback(get_error_callback()) + { + set_error_callback(cb); + } + explicit ScopedErrorSettings(error_flags flags) + : m_flags(get_error_flags()), + m_callback(get_error_callback()) + { + set_error_flags(flags); + } + explicit ScopedErrorSettings(error_flags flags, error_callback_type cb) + : m_flags(get_error_flags()), + m_callback(get_error_callback()) + { + set_error_flags(flags); + set_error_callback(cb); + } + ~ScopedErrorSettings() + { + set_error_flags(m_flags); + set_error_callback(m_callback); + } +}; + + +//----------------------------------------------------------------------------- + +/** source location */ +struct srcloc; + +C4CORE_EXPORT void handle_error(srcloc s, const char *fmt, ...); +C4CORE_EXPORT void handle_warning(srcloc s, const char *fmt, ...); + + +# define C4_ERROR(msg, ...) \ + do { \ + if(c4::get_error_flags() & c4::ON_ERROR_DEBUGBREAK) \ + { \ + C4_DEBUG_BREAK() \ + } \ + c4::handle_error(C4_SRCLOC(), msg, ## __VA_ARGS__); \ + } while(0) + + +# define C4_WARNING(msg, ...) \ + c4::handle_warning(C4_SRCLOC(), msg, ## __VA_ARGS__) + + +#if defined(C4_ERROR_SHOWS_FILELINE) && defined(C4_ERROR_SHOWS_FUNC) + +struct srcloc +{ + const char *file = ""; + const char *func = ""; + int line = 0; +}; +#define C4_SRCLOC() c4::srcloc{__FILE__, C4_PRETTY_FUNC, __LINE__} + +#elif defined(C4_ERROR_SHOWS_FILELINE) + +struct srcloc +{ + const char *file; + int line; +}; +#define C4_SRCLOC() c4::srcloc{__FILE__, __LINE__} + +#elif ! defined(C4_ERROR_SHOWS_FUNC) + +struct srcloc +{ +}; +#define C4_SRCLOC() c4::srcloc() + +#else +# error not implemented +#endif + + +//----------------------------------------------------------------------------- +// assertions + +// Doxygen needs this so that only one definition counts +#ifdef _DOXYGEN_ + /** Explicitly enables assertions, independently of NDEBUG status. + * This is meant to allow enabling assertions even when NDEBUG is defined. + * Defaults to undefined. + * @ingroup error_checking */ +# define C4_USE_ASSERT + /** assert that a condition is true; this is turned off when NDEBUG + * is defined and C4_USE_ASSERT is not true. + * @ingroup error_checking */ +# define C4_ASSERT + /** same as C4_ASSERT(), additionally prints a printf-formatted message + * @ingroup error_checking */ +# define C4_ASSERT_MSG + /** evaluates to C4_NOEXCEPT when C4_XASSERT is disabled; otherwise, defaults + * to noexcept + * @ingroup error_checking */ +# define C4_NOEXCEPT_A +#endif // _DOXYGEN_ + +#ifndef C4_USE_ASSERT +# ifdef NDEBUG +# define C4_USE_ASSERT 0 +# else +# define C4_USE_ASSERT 1 +# endif +#endif + +#if C4_USE_ASSERT +# define C4_ASSERT(cond) C4_CHECK(cond) +# define C4_ASSERT_MSG(cond, /*fmt, */...) C4_CHECK_MSG(cond, ## __VA_ARGS__) +# define C4_ASSERT_IF(predicate, cond) if(predicate) { C4_ASSERT(cond); } +# define C4_NOEXCEPT_A C4_NOEXCEPT +#else +# define C4_ASSERT(cond) +# define C4_ASSERT_MSG(cond, /*fmt, */...) +# define C4_ASSERT_IF(predicate, cond) +# define C4_NOEXCEPT_A noexcept +#endif + + +//----------------------------------------------------------------------------- +// extreme assertions + +// Doxygen needs this so that only one definition counts +#ifdef _DOXYGEN_ + /** Explicitly enables extreme assertions; this is meant to allow enabling + * assertions even when NDEBUG is defined. Defaults to undefined. + * @ingroup error_checking */ +# define C4_USE_XASSERT + /** extreme assertion: can be switched off independently of + * the regular assertion; use for example for bounds checking in hot code. + * Turned on only when C4_USE_XASSERT is defined + * @ingroup error_checking */ +# define C4_XASSERT + /** same as C4_XASSERT(), and additionally prints a printf-formatted message + * @ingroup error_checking */ +# define C4_XASSERT_MSG + /** evaluates to C4_NOEXCEPT when C4_XASSERT is disabled; otherwise, defaults to noexcept + * @ingroup error_checking */ +# define C4_NOEXCEPT_X +#endif // _DOXYGEN_ + +#ifndef C4_USE_XASSERT +# define C4_USE_XASSERT C4_USE_ASSERT +#endif + +#if C4_USE_XASSERT +# define C4_XASSERT(cond) C4_CHECK(cond) +# define C4_XASSERT_MSG(cond, /*fmt, */...) C4_CHECK_MSG(cond, ## __VA_ARGS__) +# define C4_XASSERT_IF(predicate, cond) if(predicate) { C4_XASSERT(cond); } +# define C4_NOEXCEPT_X C4_NOEXCEPT +#else +# define C4_XASSERT(cond) +# define C4_XASSERT_MSG(cond, /*fmt, */...) +# define C4_XASSERT_IF(predicate, cond) +# define C4_NOEXCEPT_X noexcept +#endif + + +//----------------------------------------------------------------------------- +// checks: never switched-off + +/** Check that a condition is true, or raise an error when not + * true. Unlike C4_ASSERT(), this check is not disabled in non-debug + * builds. + * @see C4_ASSERT + * @ingroup error_checking + * + * @todo add constexpr-compatible compile-time assert: + * https://akrzemi1.wordpress.com/2017/05/18/asserts-in-constexpr-functions/ + */ +#define C4_CHECK(cond) \ + do { \ + if(C4_UNLIKELY(!(cond))) \ + { \ + C4_ERROR("check failed: %s", #cond); \ + } \ + } while(0) + + +/** like C4_CHECK(), and additionally log a printf-style message. + * @see C4_CHECK + * @ingroup error_checking */ +#define C4_CHECK_MSG(cond, fmt, ...) \ + do { \ + if(C4_UNLIKELY(!(cond))) \ + { \ + C4_ERROR("check failed: " #cond "\n" fmt, ## __VA_ARGS__); \ + } \ + } while(0) + + +//----------------------------------------------------------------------------- +// Common error conditions + +#define C4_NOT_IMPLEMENTED() C4_ERROR("NOT IMPLEMENTED") +#define C4_NOT_IMPLEMENTED_MSG(/*msg, */...) C4_ERROR("NOT IMPLEMENTED: " ## __VA_ARGS__) +#define C4_NOT_IMPLEMENTED_IF(condition) do { if(C4_UNLIKELY(condition)) { C4_ERROR("NOT IMPLEMENTED"); } } while(0) +#define C4_NOT_IMPLEMENTED_IF_MSG(condition, /*msg, */...) do { if(C4_UNLIKELY(condition)) { C4_ERROR("NOT IMPLEMENTED: " ## __VA_ARGS__); } } while(0) + +#define C4_NEVER_REACH() do { C4_ERROR("never reach this point"); C4_UNREACHABLE(); } while(0) +#define C4_NEVER_REACH_MSG(/*msg, */...) do { C4_ERROR("never reach this point: " ## __VA_ARGS__); C4_UNREACHABLE(); } while(0) + + + +//----------------------------------------------------------------------------- +// helpers for warning suppression +// idea adapted from https://github.com/onqtam/doctest/ + + +#ifdef C4_MSVC +#define C4_SUPPRESS_WARNING_MSVC_PUSH __pragma(warning(push)) +#define C4_SUPPRESS_WARNING_MSVC(w) __pragma(warning(disable : w)) +#define C4_SUPPRESS_WARNING_MSVC_POP __pragma(warning(pop)) +#define C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(w) \ + C4_SUPPRESS_WARNING_MSVC_PUSH \ + C4_SUPPRESS_WARNING_MSVC(w) +#else // C4_MSVC +#define C4_SUPPRESS_WARNING_MSVC_PUSH +#define C4_SUPPRESS_WARNING_MSVC(w) +#define C4_SUPPRESS_WARNING_MSVC_POP +#define C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(w) +#endif // C4_MSVC + + +#ifdef C4_CLANG +#define C4_PRAGMA_TO_STR(x) _Pragma(#x) +#define C4_SUPPRESS_WARNING_CLANG_PUSH _Pragma("clang diagnostic push") +#define C4_SUPPRESS_WARNING_CLANG(w) C4_PRAGMA_TO_STR(clang diagnostic ignored w) +#define C4_SUPPRESS_WARNING_CLANG_POP _Pragma("clang diagnostic pop") +#define C4_SUPPRESS_WARNING_CLANG_WITH_PUSH(w) \ + C4_SUPPRESS_WARNING_CLANG_PUSH \ + C4_SUPPRESS_WARNING_CLANG(w) +#else // C4_CLANG +#define C4_SUPPRESS_WARNING_CLANG_PUSH +#define C4_SUPPRESS_WARNING_CLANG(w) +#define C4_SUPPRESS_WARNING_CLANG_POP +#define C4_SUPPRESS_WARNING_CLANG_WITH_PUSH(w) +#endif // C4_CLANG + + +#ifdef C4_GCC +#define C4_PRAGMA_TO_STR(x) _Pragma(#x) +#define C4_SUPPRESS_WARNING_GCC_PUSH _Pragma("GCC diagnostic push") +#define C4_SUPPRESS_WARNING_GCC(w) C4_PRAGMA_TO_STR(GCC diagnostic ignored w) +#define C4_SUPPRESS_WARNING_GCC_POP _Pragma("GCC diagnostic pop") +#define C4_SUPPRESS_WARNING_GCC_WITH_PUSH(w) \ + C4_SUPPRESS_WARNING_GCC_PUSH \ + C4_SUPPRESS_WARNING_GCC(w) +#else // C4_GCC +#define C4_SUPPRESS_WARNING_GCC_PUSH +#define C4_SUPPRESS_WARNING_GCC(w) +#define C4_SUPPRESS_WARNING_GCC_POP +#define C4_SUPPRESS_WARNING_GCC_WITH_PUSH(w) +#endif // C4_GCC + + +#define C4_SUPPRESS_WARNING_GCC_CLANG_PUSH \ + C4_SUPPRESS_WARNING_GCC_PUSH \ + C4_SUPPRESS_WARNING_CLANG_PUSH + +#define C4_SUPPRESS_WARNING_GCC_CLANG(w) \ + C4_SUPPRESS_WARNING_GCC(w) \ + C4_SUPPRESS_WARNING_CLANG(w) + +#define C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH(w) \ + C4_SUPPRESS_WARNING_GCC_WITH_PUSH(w) \ + C4_SUPPRESS_WARNING_CLANG_WITH_PUSH(w) + +#define C4_SUPPRESS_WARNING_GCC_CLANG_POP \ + C4_SUPPRESS_WARNING_GCC_POP \ + C4_SUPPRESS_WARNING_CLANG_POP + +} // namespace c4 + +#ifdef __clang__ +# pragma clang diagnostic pop +#endif + +#endif /* _C4_ERROR_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/error.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/memory_util.hpp +// https://github.com/biojppm/c4core/src/c4/memory_util.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_MEMORY_UTIL_HPP_ +#define _C4_MEMORY_UTIL_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/config.hpp +//#include "c4/config.hpp" +#if !defined(C4_CONFIG_HPP_) && !defined(_C4_CONFIG_HPP_) +#error "amalgamate: file c4/config.hpp must have been included at this point" +#endif /* C4_CONFIG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/compiler.hpp +//#include "c4/compiler.hpp" +#if !defined(C4_COMPILER_HPP_) && !defined(_C4_COMPILER_HPP_) +#error "amalgamate: file c4/compiler.hpp must have been included at this point" +#endif /* C4_COMPILER_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/cpu.hpp +//#include "c4/cpu.hpp" +#if !defined(C4_CPU_HPP_) && !defined(_C4_CPU_HPP_) +#error "amalgamate: file c4/cpu.hpp must have been included at this point" +#endif /* C4_CPU_HPP_ */ + +#ifdef C4_MSVC +#include +#endif +//included above: +//#include + +#if (defined(__GNUC__) && __GNUC__ >= 10) || defined(__has_builtin) +#define _C4_USE_LSB_INTRINSIC(which) __has_builtin(which) +#define _C4_USE_MSB_INTRINSIC(which) __has_builtin(which) +#elif defined(C4_MSVC) +#define _C4_USE_LSB_INTRINSIC(which) true +#define _C4_USE_MSB_INTRINSIC(which) true +#else +// let's try our luck +#define _C4_USE_LSB_INTRINSIC(which) true +#define _C4_USE_MSB_INTRINSIC(which) true +#endif + + +/** @file memory_util.hpp Some memory utilities. */ + +namespace c4 { + +/** set the given memory to zero */ +C4_ALWAYS_INLINE void mem_zero(void* mem, size_t num_bytes) +{ + memset(mem, 0, num_bytes); +} +/** set the given memory to zero */ +template +C4_ALWAYS_INLINE void mem_zero(T* mem, size_t num_elms) +{ + memset(mem, 0, sizeof(T) * num_elms); +} +/** set the given memory to zero */ +template +C4_ALWAYS_INLINE void mem_zero(T* mem) +{ + memset(mem, 0, sizeof(T)); +} + +C4_ALWAYS_INLINE C4_CONST bool mem_overlaps(void const* a, void const* b, size_t sza, size_t szb) +{ + // thanks @timwynants + return (((const char*)b + szb) > a && b < ((const char*)a+sza)); +} + +void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num_times); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +C4_ALWAYS_INLINE C4_CONST bool is_aligned(T *ptr, uintptr_t alignment=alignof(T)) +{ + return (uintptr_t(ptr) & (alignment - uintptr_t(1))) == uintptr_t(0); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// least significant bit + +/** @name msb Compute the least significant bit + * @note the input value must be nonzero + * @note the input type must be unsigned + */ +/** @{ */ + +// https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightLinear +#define _c4_lsb_fallback \ + unsigned c = 0; \ + v = (v ^ (v - 1)) >> 1; /* Set v's trailing 0s to 1s and zero rest */ \ + for(; v; ++c) \ + v >>= 1; \ + return (unsigned) c + +// u8 +template +C4_CONSTEXPR14 +auto lsb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_LSB_INTRINSIC(__builtin_ctz) + // upcast to use the intrinsic, it's cheaper. + #ifdef C4_MSVC + #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanForward(&bit, (unsigned long)v); + return bit; + #else + _c4_lsb_fallback; + #endif + #else + return (unsigned)__builtin_ctz((unsigned)v); + #endif + #else + _c4_lsb_fallback; + #endif +} + +// u16 +template +C4_CONSTEXPR14 +auto lsb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_LSB_INTRINSIC(__builtin_ctz) + // upcast to use the intrinsic, it's cheaper. + // Then remember that the upcast makes it to 31bits + #ifdef C4_MSVC + #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanForward(&bit, (unsigned long)v); + return bit; + #else + _c4_lsb_fallback; + #endif + #else + return (unsigned)__builtin_ctz((unsigned)v); + #endif + #else + _c4_lsb_fallback; + #endif +} + +// u32 +template +C4_CONSTEXPR14 +auto lsb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_LSB_INTRINSIC(__builtin_ctz) + #ifdef C4_MSVC + #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanForward(&bit, v); + return bit; + #else + _c4_lsb_fallback; + #endif + #else + return (unsigned)__builtin_ctz((unsigned)v); + #endif + #else + _c4_lsb_fallback; + #endif +} + +// u64 in 64bits +template +C4_CONSTEXPR14 +auto lsb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_LSB_INTRINSIC(__builtin_ctzl) + #if defined(C4_MSVC) + #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanForward64(&bit, v); + return bit; + #else + _c4_lsb_fallback; + #endif + #else + return (unsigned)__builtin_ctzl((unsigned long)v); + #endif + #else + _c4_lsb_fallback; + #endif +} + +// u64 in 32bits +template +C4_CONSTEXPR14 +auto lsb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_LSB_INTRINSIC(__builtin_ctzll) + #if defined(C4_MSVC) + #if !defined(C4_CPU_X86) && !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanForward64(&bit, v); + return bit; + #else + _c4_lsb_fallback; + #endif + #else + return (unsigned)__builtin_ctzll((unsigned long long)v); + #endif + #else + _c4_lsb_fallback; + #endif +} + +#undef _c4_lsb_fallback + +/** @} */ + + +namespace detail { +template struct _lsb11; +template +struct _lsb11 +{ + enum : unsigned { num = _lsb11>1), num_bits+I(1), (((val>>1)&I(1))!=I(0))>::num }; +}; +template +struct _lsb11 +{ + enum : unsigned { num = num_bits }; +}; +} // namespace detail + + +/** TMP version of lsb(); this needs to be implemented with template + * meta-programming because C++11 cannot use a constexpr function with + * local variables + * @see lsb */ +template +struct lsb11 +{ + static_assert(number != 0, "lsb: number must be nonzero"); + enum : unsigned { value = detail::_lsb11::num}; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// most significant bit + + +/** @name msb Compute the most significant bit + * @note the input value must be nonzero + * @note the input type must be unsigned + */ +/** @{ */ + + +#define _c4_msb8_fallback \ + unsigned n = 0; \ + if(v & I(0xf0)) v >>= 4, n |= I(4); \ + if(v & I(0x0c)) v >>= 2, n |= I(2); \ + if(v & I(0x02)) v >>= 1, n |= I(1); \ + return n + +#define _c4_msb16_fallback \ + unsigned n = 0; \ + if(v & I(0xff00)) v >>= 8, n |= I(8); \ + if(v & I(0x00f0)) v >>= 4, n |= I(4); \ + if(v & I(0x000c)) v >>= 2, n |= I(2); \ + if(v & I(0x0002)) v >>= 1, n |= I(1); \ + return n + +#define _c4_msb32_fallback \ + unsigned n = 0; \ + if(v & I(0xffff0000)) v >>= 16, n |= 16; \ + if(v & I(0x0000ff00)) v >>= 8, n |= 8; \ + if(v & I(0x000000f0)) v >>= 4, n |= 4; \ + if(v & I(0x0000000c)) v >>= 2, n |= 2; \ + if(v & I(0x00000002)) v >>= 1, n |= 1; \ + return n + +#define _c4_msb64_fallback \ + unsigned n = 0; \ + if(v & I(0xffffffff00000000)) v >>= 32, n |= I(32); \ + if(v & I(0x00000000ffff0000)) v >>= 16, n |= I(16); \ + if(v & I(0x000000000000ff00)) v >>= 8, n |= I(8); \ + if(v & I(0x00000000000000f0)) v >>= 4, n |= I(4); \ + if(v & I(0x000000000000000c)) v >>= 2, n |= I(2); \ + if(v & I(0x0000000000000002)) v >>= 1, n |= I(1); \ + return n + + +// u8 +template +C4_CONSTEXPR14 +auto msb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_MSB_INTRINSIC(__builtin_clz) + // upcast to use the intrinsic, it's cheaper. + // Then remember that the upcast makes it to 31bits + #ifdef C4_MSVC + #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanReverse(&bit, (unsigned long)v); + return bit; + #else + _c4_msb8_fallback; + #endif + #else + return 31u - (unsigned)__builtin_clz((unsigned)v); + #endif + #else + _c4_msb8_fallback; + #endif +} + +// u16 +template +C4_CONSTEXPR14 +auto msb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_MSB_INTRINSIC(__builtin_clz) + // upcast to use the intrinsic, it's cheaper. + // Then remember that the upcast makes it to 31bits + #ifdef C4_MSVC + #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanReverse(&bit, (unsigned long)v); + return bit; + #else + _c4_msb16_fallback; + #endif + #else + return 31u - (unsigned)__builtin_clz((unsigned)v); + #endif + #else + _c4_msb16_fallback; + #endif +} + +// u32 +template +C4_CONSTEXPR14 +auto msb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_MSB_INTRINSIC(__builtin_clz) + #ifdef C4_MSVC + #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanReverse(&bit, v); + return bit; + #else + _c4_msb32_fallback; + #endif + #else + return 31u - (unsigned)__builtin_clz((unsigned)v); + #endif + #else + _c4_msb32_fallback; + #endif +} + +// u64 in 64bits +template +C4_CONSTEXPR14 +auto msb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_MSB_INTRINSIC(__builtin_clzl) + #ifdef C4_MSVC + #if !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanReverse64(&bit, v); + return bit; + #else + _c4_msb64_fallback; + #endif + #else + return 63u - (unsigned)__builtin_clzl((unsigned long)v); + #endif + #else + _c4_msb64_fallback; + #endif +} + +// u64 in 32bits +template +C4_CONSTEXPR14 +auto msb(I v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(v != 0); + #if _C4_USE_MSB_INTRINSIC(__builtin_clzll) + #ifdef C4_MSVC + #if !defined(C4_CPU_X86) && !defined(C4_CPU_ARM64) && !defined(C4_CPU_ARM) + unsigned long bit; + _BitScanReverse64(&bit, v); + return bit; + #else + _c4_msb64_fallback; + #endif + #else + return 63u - (unsigned)__builtin_clzll((unsigned long long)v); + #endif + #else + _c4_msb64_fallback; + #endif +} + +#undef _c4_msb8_fallback +#undef _c4_msb16_fallback +#undef _c4_msb32_fallback +#undef _c4_msb64_fallback + +/** @} */ + + +namespace detail { +template struct _msb11; +template +struct _msb11< I, val, num_bits, false> +{ + enum : unsigned { num = _msb11>1), num_bits+I(1), ((val>>1)==I(0))>::num }; +}; +template +struct _msb11 +{ + static_assert(val == 0, "bad implementation"); + enum : unsigned { num = (unsigned)(num_bits-1) }; +}; +} // namespace detail + + +/** TMP version of msb(); this needs to be implemented with template + * meta-programming because C++11 cannot use a constexpr function with + * local variables + * @see msb */ +template +struct msb11 +{ + enum : unsigned { value = detail::_msb11::num }; +}; + + + +#undef _C4_USE_LSB_INTRINSIC +#undef _C4_USE_MSB_INTRINSIC + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// there is an implicit conversion below; it happens when E or B are +// narrower than int, and thus any operation will upcast the result to +// int, and then downcast to assign +C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wconversion") + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(B base, E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + if(exponent >= 0) + { + for(E e = 0; e < exponent; ++e) + r *= base; + } + else + { + exponent *= E(-1); + for(E e = 0; e < exponent; ++e) + r /= base; + } + return r; +} + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + if(exponent >= 0) + { + for(E e = 0; e < exponent; ++e) + r *= base; + } + else + { + exponent *= E(-1); + for(E e = 0; e < exponent; ++e) + r /= base; + } + return r; +} + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + B bbase = B(base); + if(exponent >= 0) + { + for(E e = 0; e < exponent; ++e) + r *= bbase; + } + else + { + exponent *= E(-1); + for(E e = 0; e < exponent; ++e) + r /= bbase; + } + return r; +} + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(B base, E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + for(E e = 0; e < exponent; ++e) + r *= base; + return r; +} + +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + for(E e = 0; e < exponent; ++e) + r *= base; + return r; +} +/** integer power; this function is constexpr-14 because of the local + * variables */ +template +C4_CONSTEXPR14 C4_CONST auto ipow(E exponent) noexcept -> typename std::enable_if::value, B>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + B r = B(1); + B bbase = B(base); + for(E e = 0; e < exponent; ++e) + r *= bbase; + return r; +} + +C4_SUPPRESS_WARNING_GCC_CLANG_POP + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** return a mask with all bits set [first_bit,last_bit[; this function + * is constexpr-14 because of the local variables */ +template +C4_CONSTEXPR14 I contiguous_mask(I first_bit, I last_bit) +{ + I r = 0; + for(I i = first_bit; i < last_bit; ++i) + { + r |= (I(1) << i); + } + return r; +} + + +namespace detail { + +template +struct _ctgmsk11; + +template +struct _ctgmsk11< I, val, first, last, true> +{ + enum : I { value = _ctgmsk11::value }; +}; + +template +struct _ctgmsk11< I, val, first, last, false> +{ + enum : I { value = val }; +}; + +} // namespace detail + + +/** TMP version of contiguous_mask(); this needs to be implemented with template + * meta-programming because C++11 cannot use a constexpr function with + * local variables + * @see contiguous_mask */ +template +struct contiguous_mask11 +{ + enum : I { value = detail::_ctgmsk11::value }; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** use Empty Base Class Optimization to reduce the size of a pair of + * potentially empty types*/ + +namespace detail { +typedef enum { + tpc_same, + tpc_same_empty, + tpc_both_empty, + tpc_first_empty, + tpc_second_empty, + tpc_general +} TightPairCase_e; + +template +constexpr TightPairCase_e tpc_which_case() +{ + return std::is_same::value ? + std::is_empty::value ? + tpc_same_empty + : + tpc_same + : + std::is_empty::value && std::is_empty::value ? + tpc_both_empty + : + std::is_empty::value ? + tpc_first_empty + : + std::is_empty::value ? + tpc_second_empty + : + tpc_general + ; +} + +template +struct tight_pair +{ +private: + + First m_first; + Second m_second; + +public: + + using first_type = First; + using second_type = Second; + + tight_pair() : m_first(), m_second() {} + tight_pair(First const& f, Second const& s) : m_first(f), m_second(s) {} + + C4_ALWAYS_INLINE C4_CONSTEXPR14 First & first () { return m_first; } + C4_ALWAYS_INLINE C4_CONSTEXPR14 First const& first () const { return m_first; } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second & second() { return m_second; } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second const& second() const { return m_second; } +}; + +template +struct tight_pair : public First +{ + static_assert(std::is_same::value, "bad implementation"); + + using first_type = First; + using second_type = Second; + + tight_pair() : First() {} + tight_pair(First const& f, Second const& /*s*/) : First(f) {} + + C4_ALWAYS_INLINE C4_CONSTEXPR14 First & first () { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 First const& first () const { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second & second() { return reinterpret_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second const& second() const { return reinterpret_cast(*this); } +}; + +template +struct tight_pair : public First, public Second +{ + using first_type = First; + using second_type = Second; + + tight_pair() : First(), Second() {} + tight_pair(First const& f, Second const& s) : First(f), Second(s) {} + + C4_ALWAYS_INLINE C4_CONSTEXPR14 First & first () { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 First const& first () const { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second & second() { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second const& second() const { return static_cast(*this); } +}; + +template +struct tight_pair : public First +{ + Second m_second; + + using first_type = First; + using second_type = Second; + + tight_pair() : First() {} + tight_pair(First const& f, Second const& s) : First(f), m_second(s) {} + + C4_ALWAYS_INLINE C4_CONSTEXPR14 First & first () { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 First const& first () const { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second & second() { return m_second; } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second const& second() const { return m_second; } +}; + +template +struct tight_pair : public First +{ + Second m_second; + + using first_type = First; + using second_type = Second; + + tight_pair() : First(), m_second() {} + tight_pair(First const& f, Second const& s) : First(f), m_second(s) {} + + C4_ALWAYS_INLINE C4_CONSTEXPR14 First & first () { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 First const& first () const { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second & second() { return m_second; } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second const& second() const { return m_second; } +}; + +template +struct tight_pair : public Second +{ + First m_first; + + using first_type = First; + using second_type = Second; + + tight_pair() : Second(), m_first() {} + tight_pair(First const& f, Second const& s) : Second(s), m_first(f) {} + + C4_ALWAYS_INLINE C4_CONSTEXPR14 First & first () { return m_first; } + C4_ALWAYS_INLINE C4_CONSTEXPR14 First const& first () const { return m_first; } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second & second() { return static_cast(*this); } + C4_ALWAYS_INLINE C4_CONSTEXPR14 Second const& second() const { return static_cast(*this); } +}; + +} // namespace detail + +template +using tight_pair = detail::tight_pair()>; + +} // namespace c4 + +#endif /* _C4_MEMORY_UTIL_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/memory_util.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/memory_resource.hpp +// https://github.com/biojppm/c4core/src/c4/memory_resource.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_MEMORY_RESOURCE_HPP_ +#define _C4_MEMORY_RESOURCE_HPP_ + +/** @file memory_resource.hpp Provides facilities to allocate typeless + * memory, via the memory resource model consecrated with C++17. */ + +/** @defgroup memory memory utilities */ + +/** @defgroup raw_memory_alloc Raw memory allocation + * @ingroup memory + */ + +/** @defgroup memory_resources Memory resources + * @ingroup memory + */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/config.hpp +//#include "c4/config.hpp" +#if !defined(C4_CONFIG_HPP_) && !defined(_C4_CONFIG_HPP_) +#error "amalgamate: file c4/config.hpp must have been included at this point" +#endif /* C4_CONFIG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + + +namespace c4 { + +// need these forward decls here +struct MemoryResource; +struct MemoryResourceMalloc; +struct MemoryResourceStack; +MemoryResourceMalloc* get_memory_resource_malloc(); +MemoryResourceStack* get_memory_resource_stack(); +namespace detail { MemoryResource*& get_memory_resource(); } + + +// c-style allocation --------------------------------------------------------- + +// this API provides aligned allocation functions. +// These functions forward the call to a user-modifiable function. + + +// aligned allocation. + +/** Aligned allocation. Merely calls the current get_aalloc() function. + * @see get_aalloc() + * @ingroup raw_memory_alloc */ +void* aalloc(size_t sz, size_t alignment); + +/** Aligned free. Merely calls the current get_afree() function. + * @see get_afree() + * @ingroup raw_memory_alloc */ +void afree(void* ptr); + +/** Aligned reallocation. Merely calls the current get_arealloc() function. + * @see get_arealloc() + * @ingroup raw_memory_alloc */ +void* arealloc(void* ptr, size_t oldsz, size_t newsz, size_t alignment); + + +// allocation setup facilities. + +/** Function pointer type for aligned allocation + * @see set_aalloc() + * @ingroup raw_memory_alloc */ +using aalloc_pfn = void* (*)(size_t size, size_t alignment); + +/** Function pointer type for aligned deallocation + * @see set_afree() + * @ingroup raw_memory_alloc */ +using afree_pfn = void (*)(void *ptr); + +/** Function pointer type for aligned reallocation + * @see set_arealloc() + * @ingroup raw_memory_alloc */ +using arealloc_pfn = void* (*)(void *ptr, size_t oldsz, size_t newsz, size_t alignment); + + +// allocation function pointer setters/getters + +/** Set the global aligned allocation function. + * @see aalloc() + * @see get_aalloc() + * @ingroup raw_memory_alloc */ +void set_aalloc(aalloc_pfn fn); + +/** Set the global aligned deallocation function. + * @see afree() + * @see get_afree() + * @ingroup raw_memory_alloc */ +void set_afree(afree_pfn fn); + +/** Set the global aligned reallocation function. + * @see arealloc() + * @see get_arealloc() + * @ingroup raw_memory_alloc */ +void set_arealloc(arealloc_pfn fn); + + +/** Get the global aligned reallocation function. + * @see arealloc() + * @ingroup raw_memory_alloc */ +aalloc_pfn get_aalloc(); + +/** Get the global aligned deallocation function. + * @see afree() + * @ingroup raw_memory_alloc */ +afree_pfn get_afree(); + +/** Get the global aligned reallocation function. + * @see arealloc() + * @ingroup raw_memory_alloc */ +arealloc_pfn get_arealloc(); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// c++-style allocation ------------------------------------------------------- + +/** C++17-style memory_resource base class. See http://en.cppreference.com/w/cpp/experimental/memory_resource + * @ingroup memory_resources */ +struct MemoryResource +{ + const char *name = nullptr; + virtual ~MemoryResource() {} + + void* allocate(size_t sz, size_t alignment=alignof(max_align_t), void *hint=nullptr) + { + void *mem = this->do_allocate(sz, alignment, hint); + C4_CHECK_MSG(mem != nullptr, "could not allocate %lu bytes", sz); + return mem; + } + + void* reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment=alignof(max_align_t)) + { + void *mem = this->do_reallocate(ptr, oldsz, newsz, alignment); + C4_CHECK_MSG(mem != nullptr, "could not reallocate from %lu to %lu bytes", oldsz, newsz); + return mem; + } + + void deallocate(void* ptr, size_t sz, size_t alignment=alignof(max_align_t)) + { + this->do_deallocate(ptr, sz, alignment); + } + +protected: + + virtual void* do_allocate(size_t sz, size_t alignment, void* hint) = 0; + virtual void* do_reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment) = 0; + virtual void do_deallocate(void* ptr, size_t sz, size_t alignment) = 0; + +}; + +/** get the current global memory resource. To avoid static initialization + * order problems, this is implemented using a function call to ensure + * that it is available when first used. + * @ingroup memory_resources */ +C4_ALWAYS_INLINE MemoryResource* get_memory_resource() +{ + return detail::get_memory_resource(); +} + +/** set the global memory resource + * @ingroup memory_resources */ +C4_ALWAYS_INLINE void set_memory_resource(MemoryResource* mr) +{ + C4_ASSERT(mr != nullptr); + detail::get_memory_resource() = mr; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A c4::aalloc-based memory resource. Thread-safe if the implementation + * called by c4::aalloc() is safe. + * @ingroup memory_resources */ +struct MemoryResourceMalloc : public MemoryResource +{ + + MemoryResourceMalloc() { name = "malloc"; } + virtual ~MemoryResourceMalloc() override {} + +protected: + + virtual void* do_allocate(size_t sz, size_t alignment, void *hint) override + { + C4_UNUSED(hint); + return c4::aalloc(sz, alignment); + } + + virtual void do_deallocate(void* ptr, size_t sz, size_t alignment) override + { + C4_UNUSED(sz); + C4_UNUSED(alignment); + c4::afree(ptr); + } + + virtual void* do_reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment) override + { + return c4::arealloc(ptr, oldsz, newsz, alignment); + } + +}; + +/** returns a malloc-based memory resource + * @ingroup memory_resources */ +C4_ALWAYS_INLINE MemoryResourceMalloc* get_memory_resource_malloc() +{ + /** @todo use a nifty counter: + * https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Nifty_Counter */ + static MemoryResourceMalloc mr; + return &mr; +} + +namespace detail { +C4_ALWAYS_INLINE MemoryResource* & get_memory_resource() +{ + /** @todo use a nifty counter: + * https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Nifty_Counter */ + thread_local static MemoryResource* mr = get_memory_resource_malloc(); + return mr; +} +} // namespace detail + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace detail { + +/** Allows a memory resource to obtain its memory from another memory resource. + * @ingroup memory_resources */ +struct DerivedMemoryResource : public MemoryResource +{ +public: + + DerivedMemoryResource(MemoryResource *mr_=nullptr) : m_local(mr_ ? mr_ : get_memory_resource()) {} + +private: + + MemoryResource *m_local; + +protected: + + virtual void* do_allocate(size_t sz, size_t alignment, void* hint) override + { + return m_local->allocate(sz, alignment, hint); + } + + virtual void* do_reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment) override + { + return m_local->reallocate(ptr, oldsz, newsz, alignment); + } + + virtual void do_deallocate(void* ptr, size_t sz, size_t alignment) override + { + return m_local->deallocate(ptr, sz, alignment); + } +}; + +/** Provides common facilities for memory resource consisting of a single memory block + * @ingroup memory_resources */ +struct _MemoryResourceSingleChunk : public DerivedMemoryResource +{ + + C4_NO_COPY_OR_MOVE(_MemoryResourceSingleChunk); + + using impl_type = DerivedMemoryResource; + +public: + + _MemoryResourceSingleChunk(MemoryResource *impl=nullptr) : DerivedMemoryResource(impl) { name = "linear_malloc"; } + + /** initialize with owned memory, allocated from the given (or the global) memory resource */ + _MemoryResourceSingleChunk(size_t sz, MemoryResource *impl=nullptr) : _MemoryResourceSingleChunk(impl) { acquire(sz); } + /** initialize with borrowed memory */ + _MemoryResourceSingleChunk(void *mem, size_t sz) : _MemoryResourceSingleChunk() { acquire(mem, sz); } + + virtual ~_MemoryResourceSingleChunk() override { release(); } + +public: + + void const* mem() const { return m_mem; } + + size_t capacity() const { return m_size; } + size_t size() const { return m_pos; } + size_t slack() const { C4_ASSERT(m_size >= m_pos); return m_size - m_pos; } + +public: + + char *m_mem{nullptr}; + size_t m_size{0}; + size_t m_pos{0}; + bool m_owner; + +public: + + /** set the internal pointer to the beginning of the linear buffer */ + void clear() { m_pos = 0; } + + /** initialize with owned memory, allocated from the global memory resource */ + void acquire(size_t sz); + /** initialize with borrowed memory */ + void acquire(void *mem, size_t sz); + /** release the memory */ + void release(); + +}; + +} // namespace detail + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** provides a linear memory resource. Allocates incrementally from a linear + * buffer, without ever deallocating. Deallocations are a no-op, and the + * memory is freed only when the resource is release()d. The memory used by + * this object can be either owned or borrowed. When borrowed, no calls to + * malloc/free take place. + * + * @ingroup memory_resources */ +struct MemoryResourceLinear : public detail::_MemoryResourceSingleChunk +{ + + C4_NO_COPY_OR_MOVE(MemoryResourceLinear); + +public: + + using detail::_MemoryResourceSingleChunk::_MemoryResourceSingleChunk; + +protected: + + virtual void* do_allocate(size_t sz, size_t alignment, void *hint) override; + virtual void do_deallocate(void* ptr, size_t sz, size_t alignment) override; + virtual void* do_reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment) override; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** provides a stack-type malloc-based memory resource. + * @ingroup memory_resources */ +struct MemoryResourceStack : public detail::_MemoryResourceSingleChunk +{ + + C4_NO_COPY_OR_MOVE(MemoryResourceStack); + +public: + + using detail::_MemoryResourceSingleChunk::_MemoryResourceSingleChunk; + +protected: + + virtual void* do_allocate(size_t sz, size_t alignment, void *hint) override; + virtual void do_deallocate(void* ptr, size_t sz, size_t alignment) override; + virtual void* do_reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment) override; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** provides a linear array-based memory resource. + * @see MemoryResourceLinear + * @ingroup memory_resources */ +template +struct MemoryResourceLinearArr : public MemoryResourceLinear +{ + #ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable: 4324) // structure was padded due to alignment specifier + #endif + alignas(alignof(max_align_t)) char m_arr[N]; + #ifdef _MSC_VER + #pragma warning(pop) + #endif + MemoryResourceLinearArr() : MemoryResourceLinear(m_arr, N) { name = "linear_arr"; } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +struct AllocationCounts +{ + struct Item + { + ssize_t allocs; + ssize_t size; + + void add(size_t sz) + { + ++allocs; + size += static_cast(sz); + } + void rem(size_t sz) + { + --allocs; + size -= static_cast(sz); + } + Item max(Item const& that) const + { + Item r(*this); + r.allocs = r.allocs > that.allocs ? r.allocs : that.allocs; + r.size = r.size > that.size ? r.size : that.size; + return r; + } + }; + + Item curr = {0, 0}; + Item total = {0, 0}; + Item max = {0, 0}; + + void clear_counts() + { + curr = {0, 0}; + total = {0, 0}; + max = {0, 0}; + } + + void update(AllocationCounts const& that) + { + curr.allocs += that.curr.allocs; + curr.size += that.curr.size; + total.allocs += that.total.allocs; + total.size += that.total.size; + max.allocs += that.max.allocs; + max.size += that.max.size; + } + + void add_counts(void* ptr, size_t sz) + { + if(ptr == nullptr) return; + curr.add(sz); + total.add(sz); + max = max.max(curr); + } + + void rem_counts(void *ptr, size_t sz) + { + if(ptr == nullptr) return; + curr.rem(sz); + } + + AllocationCounts operator- (AllocationCounts const& that) const + { + AllocationCounts r(*this); + r.curr.allocs -= that.curr.allocs; + r.curr.size -= that.curr.size; + r.total.allocs -= that.total.allocs; + r.total.size -= that.total.size; + r.max.allocs -= that.max.allocs; + r.max.size -= that.max.size; + return r; + } + + AllocationCounts operator+ (AllocationCounts const& that) const + { + AllocationCounts r(*this); + r.curr.allocs += that.curr.allocs; + r.curr.size += that.curr.size; + r.total.allocs += that.total.allocs; + r.total.size += that.total.size; + r.max.allocs += that.max.allocs; + r.max.size += that.max.size; + return r; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** a MemoryResource which latches onto another MemoryResource + * and counts allocations and sizes. + * @ingroup memory_resources */ +class MemoryResourceCounts : public MemoryResource +{ +public: + + MemoryResourceCounts() : m_resource(get_memory_resource()) + { + C4_ASSERT(m_resource != this); + name = "MemoryResourceCounts"; + } + MemoryResourceCounts(MemoryResource *res) : m_resource(res) + { + C4_ASSERT(m_resource != this); + name = "MemoryResourceCounts"; + } + + MemoryResource *resource() { return m_resource; } + AllocationCounts const& counts() const { return m_counts; } + +protected: + + MemoryResource *m_resource; + AllocationCounts m_counts; + +protected: + + virtual void* do_allocate(size_t sz, size_t alignment, void * /*hint*/) override + { + void *ptr = m_resource->allocate(sz, alignment); + m_counts.add_counts(ptr, sz); + return ptr; + } + + virtual void do_deallocate(void* ptr, size_t sz, size_t alignment) override + { + m_counts.rem_counts(ptr, sz); + m_resource->deallocate(ptr, sz, alignment); + } + + virtual void* do_reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment) override + { + m_counts.rem_counts(ptr, oldsz); + void* nptr = m_resource->reallocate(ptr, oldsz, newsz, alignment); + m_counts.add_counts(nptr, newsz); + return nptr; + } + +}; + +//----------------------------------------------------------------------------- +/** RAII class which binds a memory resource with a scope duration. + * @ingroup memory_resources */ +struct ScopedMemoryResource +{ + MemoryResource *m_original; + + ScopedMemoryResource(MemoryResource *r) + : + m_original(get_memory_resource()) + { + set_memory_resource(r); + } + + ~ScopedMemoryResource() + { + set_memory_resource(m_original); + } +}; + +//----------------------------------------------------------------------------- +/** RAII class which counts allocations and frees inside a scope. Can + * optionally set also the memory resource to be used. + * @ingroup memory_resources */ +struct ScopedMemoryResourceCounts +{ + MemoryResourceCounts mr; + + ScopedMemoryResourceCounts() : mr() + { + set_memory_resource(&mr); + } + ScopedMemoryResourceCounts(MemoryResource *m) : mr(m) + { + set_memory_resource(&mr); + } + ~ScopedMemoryResourceCounts() + { + set_memory_resource(mr.resource()); + } +}; + +} // namespace c4 + +#endif /* _C4_MEMORY_RESOURCE_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/memory_resource.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/ctor_dtor.hpp +// https://github.com/biojppm/c4core/src/c4/ctor_dtor.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_CTOR_DTOR_HPP_ +#define _C4_CTOR_DTOR_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/preprocessor.hpp +//#include "c4/preprocessor.hpp" +#if !defined(C4_PREPROCESSOR_HPP_) && !defined(_C4_PREPROCESSOR_HPP_) +#error "amalgamate: file c4/preprocessor.hpp must have been included at this point" +#endif /* C4_PREPROCESSOR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/language.hpp +//#include "c4/language.hpp" +#if !defined(C4_LANGUAGE_HPP_) && !defined(_C4_LANGUAGE_HPP_) +#error "amalgamate: file c4/language.hpp must have been included at this point" +#endif /* C4_LANGUAGE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/memory_util.hpp +//#include "c4/memory_util.hpp" +#if !defined(C4_MEMORY_UTIL_HPP_) && !defined(_C4_MEMORY_UTIL_HPP_) +#error "amalgamate: file c4/memory_util.hpp must have been included at this point" +#endif /* C4_MEMORY_UTIL_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + + +//included above: +//#include +//included above: +//#include // std::forward + +/** @file ctor_dtor.hpp object construction and destruction facilities. + * Some of these are not yet available in C++11. */ + +namespace c4 { + +/** default-construct an object, trivial version */ +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +construct(U *ptr) noexcept +{ + memset(ptr, 0, sizeof(U)); +} +/** default-construct an object, non-trivial version */ +template C4_ALWAYS_INLINE typename std ::enable_if< ! std::is_trivially_default_constructible::value, void>::type +construct(U* ptr) noexcept +{ + new ((void*)ptr) U(); +} + +/** default-construct n objects, trivial version */ +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +construct_n(U* ptr, I n) noexcept +{ + memset(ptr, 0, n * sizeof(U)); +} +/** default-construct n objects, non-trivial version */ +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_default_constructible::value, void>::type +construct_n(U* ptr, I n) noexcept +{ + for(I i = 0; i < n; ++i) + { + new ((void*)(ptr + i)) U(); + } +} + +#ifdef __clang__ +# pragma clang diagnostic push +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# if __GNUC__ >= 6 +# pragma GCC diagnostic ignored "-Wnull-dereference" +# endif +#endif + +template +inline void construct(U* ptr, Args&&... args) +{ + new ((void*)ptr) U(std::forward(args)...); +} +template +inline void construct_n(U* ptr, I n, Args&&... args) +{ + for(I i = 0; i < n; ++i) + { + new ((void*)(ptr + i)) U(args...); + } +} + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + + +//----------------------------------------------------------------------------- +// copy-construct + +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +copy_construct(U* dst, U const* src) noexcept +{ + C4_ASSERT(dst != src); + memcpy(dst, src, sizeof(U)); +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_copy_constructible::value, void>::type +copy_construct(U* dst, U const* src) +{ + C4_ASSERT(dst != src); + new ((void*)dst) U(*src); +} +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +copy_construct_n(U* dst, U const* src, I n) noexcept +{ + C4_ASSERT(dst != src); + memcpy(dst, src, n * sizeof(U)); +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_copy_constructible::value, void>::type +copy_construct_n(U* dst, U const* src, I n) +{ + C4_ASSERT(dst != src); + for(I i = 0; i < n; ++i) + { + new ((void*)(dst + i)) U(*(src + i)); + } +} + +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +copy_construct(U* dst, U src) noexcept // pass by value for scalar types +{ + *dst = src; +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_scalar::value, void>::type +copy_construct(U* dst, U const& src) // pass by reference for non-scalar types +{ + C4_ASSERT(dst != &src); + new ((void*)dst) U(src); +} +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +copy_construct_n(U* dst, U src, I n) noexcept // pass by value for scalar types +{ + for(I i = 0; i < n; ++i) + { + dst[i] = src; + } +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_scalar::value, void>::type +copy_construct_n(U* dst, U const& src, I n) // pass by reference for non-scalar types +{ + C4_ASSERT(dst != &src); + for(I i = 0; i < n; ++i) + { + new ((void*)(dst + i)) U(src); + } +} + +template +C4_ALWAYS_INLINE void copy_construct(U (&dst)[N], U const (&src)[N]) noexcept +{ + copy_construct_n(dst, src, N); +} + +//----------------------------------------------------------------------------- +// copy-assign + +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +copy_assign(U* dst, U const* src) noexcept +{ + C4_ASSERT(dst != src); + memcpy(dst, src, sizeof(U)); +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_copy_assignable::value, void>::type +copy_assign(U* dst, U const* src) noexcept +{ + C4_ASSERT(dst != src); + *dst = *src; +} +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +copy_assign_n(U* dst, U const* src, I n) noexcept +{ + C4_ASSERT(dst != src); + memcpy(dst, src, n * sizeof(U)); +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_copy_assignable::value, void>::type +copy_assign_n(U* dst, U const* src, I n) noexcept +{ + C4_ASSERT(dst != src); + for(I i = 0; i < n; ++i) + { + dst[i] = src[i]; + } +} + +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +copy_assign(U* dst, U src) noexcept // pass by value for scalar types +{ + *dst = src; +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_scalar::value, void>::type +copy_assign(U* dst, U const& src) noexcept // pass by reference for non-scalar types +{ + C4_ASSERT(dst != &src); + *dst = src; +} +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +copy_assign_n(U* dst, U src, I n) noexcept // pass by value for scalar types +{ + for(I i = 0; i < n; ++i) + { + dst[i] = src; + } +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_scalar::value, void>::type +copy_assign_n(U* dst, U const& src, I n) noexcept // pass by reference for non-scalar types +{ + C4_ASSERT(dst != &src); + for(I i = 0; i < n; ++i) + { + dst[i] = src; + } +} + +template +C4_ALWAYS_INLINE void copy_assign(U (&dst)[N], U const (&src)[N]) noexcept +{ + copy_assign_n(dst, src, N); +} + +//----------------------------------------------------------------------------- +// move-construct + +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +move_construct(U* dst, U* src) noexcept +{ + C4_ASSERT(dst != src); + memcpy(dst, src, sizeof(U)); +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_move_constructible::value, void>::type +move_construct(U* dst, U* src) noexcept +{ + C4_ASSERT(dst != src); + new ((void*)dst) U(std::move(*src)); +} +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +move_construct_n(U* dst, U* src, I n) noexcept +{ + C4_ASSERT(dst != src); + memcpy(dst, src, n * sizeof(U)); +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_move_constructible::value, void>::type +move_construct_n(U* dst, U* src, I n) noexcept +{ + C4_ASSERT(dst != src); + for(I i = 0; i < n; ++i) + { + new ((void*)(dst + i)) U(std::move(src[i])); + } +} + +//----------------------------------------------------------------------------- +// move-assign + +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +move_assign(U* dst, U* src) noexcept +{ + C4_ASSERT(dst != src); + memcpy(dst, src, sizeof(U)); +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_move_assignable::value, void>::type +move_assign(U* dst, U* src) noexcept +{ + C4_ASSERT(dst != src); + *dst = std::move(*src); +} +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +move_assign_n(U* dst, U* src, I n) noexcept +{ + C4_ASSERT(dst != src); + memcpy(dst, src, n * sizeof(U)); +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_move_assignable::value, void>::type +move_assign_n(U* dst, U* src, I n) noexcept +{ + C4_ASSERT(dst != src); + for(I i = 0; i < n; ++i) + { + *(dst + i) = std::move(*(src + i)); + } +} + +//----------------------------------------------------------------------------- +// destroy + +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +destroy(U* ptr) noexcept +{ + C4_UNUSED(ptr); // nothing to do +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_destructible::value, void>::type +destroy(U* ptr) noexcept +{ + ptr->~U(); +} +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +destroy_n(U* ptr, I n) noexcept +{ + C4_UNUSED(ptr); + C4_UNUSED(n); // nothing to do +} +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_destructible::value, void>::type +destroy_n(U* ptr, I n) noexcept +{ + for(I i = 0; i C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +make_room(U *buf, I bufsz, I room) C4_NOEXCEPT_A +{ + C4_ASSERT(bufsz >= 0 && room >= 0); + if(room >= bufsz) + { + memcpy (buf + room, buf, bufsz * sizeof(U)); + } + else + { + memmove(buf + room, buf, bufsz * sizeof(U)); + } +} +/** makes room at the beginning of buf, which has a current size of bufsz */ +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_move_constructible::value, void>::type +make_room(U *buf, I bufsz, I room) C4_NOEXCEPT_A +{ + C4_ASSERT(bufsz >= 0 && room >= 0); + if(room >= bufsz) + { + for(I i = 0; i < bufsz; ++i) + { + new ((void*)(buf + (i + room))) U(std::move(buf[i])); + } + } + else + { + for(I i = 0; i < bufsz; ++i) + { + I w = bufsz-1 - i; // do a backwards loop + new ((void*)(buf + (w + room))) U(std::move(buf[w])); + } + } +} + +/** make room to the right of pos */ +template +C4_ALWAYS_INLINE void make_room(U *buf, I bufsz, I currsz, I pos, I room) +{ + C4_ASSERT(pos >= 0 && pos <= currsz); + C4_ASSERT(currsz <= bufsz); + C4_ASSERT(room + currsz <= bufsz); + C4_UNUSED(bufsz); + make_room(buf + pos, currsz - pos, room); +} + + +/** make room to the right of pos, copying to the beginning of a different buffer */ +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +make_room(U *dst, U const* src, I srcsz, I room, I pos) C4_NOEXCEPT_A +{ + C4_ASSERT(srcsz >= 0 && room >= 0 && pos >= 0); + C4_ASSERT(pos < srcsz || (pos == 0 && srcsz == 0)); + memcpy(dst , src , pos * sizeof(U)); + memcpy(dst + room + pos, src + pos, (srcsz - pos) * sizeof(U)); +} +/** make room to the right of pos, copying to the beginning of a different buffer */ +template C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_move_constructible::value, void>::type +make_room(U *dst, U const* src, I srcsz, I room, I pos) +{ + C4_ASSERT(srcsz >= 0 && room >= 0 && pos >= 0); + C4_ASSERT(pos < srcsz || (pos == 0 && srcsz == 0)); + for(I i = 0; i < pos; ++i) + { + new ((void*)(dst + i)) U(std::move(src[i])); + } + src += pos; + dst += room + pos; + for(I i = 0, e = srcsz - pos; i < e; ++i) + { + new ((void*)(dst + i)) U(std::move(src[i])); + } +} + +template +C4_ALWAYS_INLINE void make_room +( + U * dst, I dstsz, + U const* src, I srcsz, + I room, I pos +) +{ + C4_ASSERT(pos >= 0 && pos < srcsz || (srcsz == 0 && pos == 0)); + C4_ASSERT(pos >= 0 && pos < dstsz || (dstsz == 0 && pos == 0)); + C4_ASSERT(srcsz+room <= dstsz); + C4_UNUSED(dstsz); + make_room(dst, src, srcsz, room, pos); +} + + +//----------------------------------------------------------------------------- +/** destroy room at the beginning of buf, which has a current size of n */ +template C4_ALWAYS_INLINE typename std::enable_if::value || (std::is_standard_layout::value && std::is_trivial::value), void>::type +destroy_room(U *buf, I n, I room) C4_NOEXCEPT_A +{ + C4_ASSERT(n >= 0 && room >= 0); + C4_ASSERT(room <= n); + if(room < n) + { + memmove(buf, buf + room, (n - room) * sizeof(U)); + } + else + { + // nothing to do - no need to destroy scalar types + } +} +/** destroy room at the beginning of buf, which has a current size of n */ +template C4_ALWAYS_INLINE typename std::enable_if< ! (std::is_scalar::value || (std::is_standard_layout::value && std::is_trivial::value)), void>::type +destroy_room(U *buf, I n, I room) +{ + C4_ASSERT(n >= 0 && room >= 0); + C4_ASSERT(room <= n); + if(room < n) + { + for(I i = 0, e = n - room; i < e; ++i) + { + buf[i] = std::move(buf[i + room]); + } + } + else + { + for(I i = 0; i < n; ++i) + { + buf[i].~U(); + } + } +} + +/** destroy room to the right of pos, copying to a different buffer */ +template C4_ALWAYS_INLINE typename std::enable_if::value, void>::type +destroy_room(U *dst, U const* src, I n, I room, I pos) C4_NOEXCEPT_A +{ + C4_ASSERT(n >= 0 && room >= 0 && pos >= 0); + C4_ASSERT(pos C4_ALWAYS_INLINE typename std::enable_if< ! std::is_trivially_move_constructible::value, void>::type +destroy_room(U *dst, U const* src, I n, I room, I pos) +{ + C4_ASSERT(n >= 0 && room >= 0 && pos >= 0); + C4_ASSERT(pos < n); + C4_ASSERT(pos + room <= n); + for(I i = 0; i < pos; ++i) + { + new ((void*)(dst + i)) U(std::move(src[i])); + } + src += room + pos; + dst += pos; + for(I i = 0, e = n - pos - room; i < e; ++i) + { + new ((void*)(dst + i)) U(std::move(src[i])); + } +} + +} // namespace c4 + +#undef _C4REQUIRE + +#endif /* _C4_CTOR_DTOR_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/ctor_dtor.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/allocator.hpp +// https://github.com/biojppm/c4core/src/c4/allocator.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_ALLOCATOR_HPP_ +#define _C4_ALLOCATOR_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/memory_resource.hpp +//#include "c4/memory_resource.hpp" +#if !defined(C4_MEMORY_RESOURCE_HPP_) && !defined(_C4_MEMORY_RESOURCE_HPP_) +#error "amalgamate: file c4/memory_resource.hpp must have been included at this point" +#endif /* C4_MEMORY_RESOURCE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/ctor_dtor.hpp +//#include "c4/ctor_dtor.hpp" +#if !defined(C4_CTOR_DTOR_HPP_) && !defined(_C4_CTOR_DTOR_HPP_) +#error "amalgamate: file c4/ctor_dtor.hpp must have been included at this point" +#endif /* C4_CTOR_DTOR_HPP_ */ + + +#include // std::allocator_traits +//included above: +//#include + +/** @file allocator.hpp Contains classes to make typeful allocations (note + * that memory resources are typeless) */ + +/** @defgroup mem_res_providers Memory resource providers + * @brief Policy classes which provide a memory resource for + * use in an allocator. + * @ingroup memory + */ + +/** @defgroup allocators Allocators + * @brief Lightweight classes that act as handles to specific memory + * resources and provide typeful memory. + * @ingroup memory + */ + +namespace c4 { + +namespace detail { +template inline size_t size_for (size_t num_objs) noexcept { return num_objs * sizeof(T); } +template< > inline size_t size_for(size_t num_objs) noexcept { return num_objs; } +} // namespace detail + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** provides a per-allocator memory resource + * @ingroup mem_res_providers */ +class MemRes +{ +public: + + MemRes() : m_resource(get_memory_resource()) {} + MemRes(MemoryResource* r) noexcept : m_resource(r ? r : get_memory_resource()) {} + + inline MemoryResource* resource() const { return m_resource; } + +private: + + MemoryResource* m_resource; + +}; + + +/** the allocators using this will default to the global memory resource + * @ingroup mem_res_providers */ +class MemResGlobal +{ +public: + + MemResGlobal() {} + MemResGlobal(MemoryResource* r) noexcept { C4_UNUSED(r); C4_ASSERT(r == get_memory_resource()); } + + inline MemoryResource* resource() const { return get_memory_resource(); } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace detail { +template +struct _AllocatorUtil; + +template +struct has_no_alloc + : public std::integral_constant::value) + && std::is_constructible::value> {}; + +// std::uses_allocator_v && std::is_constructible +// ie can construct(std::allocator_arg_t, MemoryResource*, Args...) +template +struct has_alloc_arg + : public std::integral_constant::value + && std::is_constructible::value> {}; +// std::uses_allocator && std::is_constructible +// ie, can construct(Args..., MemoryResource*) +template +struct has_alloc + : public std::integral_constant::value + && std::is_constructible::value> {}; + +} // namespace detail + + +template +struct detail::_AllocatorUtil : public MemRes +{ + using MemRes::MemRes; + + /** for construct: + * @see http://en.cppreference.com/w/cpp/experimental/polymorphic_allocator/construct */ + + // 1. types with no allocators + template + C4_ALWAYS_INLINE typename std::enable_if::value, void>::type + construct(U *ptr, Args &&...args) + { + c4::construct(ptr, std::forward(args)...); + } + template + C4_ALWAYS_INLINE typename std::enable_if::value, void>::type + construct_n(U* ptr, I n, Args&&... args) + { + c4::construct_n(ptr, n, std::forward(args)...); + } + + // 2. types using allocators (ie, containers) + + // 2.1. can construct(std::allocator_arg_t, MemoryResource*, Args...) + template + C4_ALWAYS_INLINE typename std::enable_if::value, void>::type + construct(U* ptr, Args&&... args) + { + c4::construct(ptr, std::allocator_arg, this->resource(), std::forward(args)...); + } + template + C4_ALWAYS_INLINE typename std::enable_if::value, void>::type + construct_n(U* ptr, I n, Args&&... args) + { + c4::construct_n(ptr, n, std::allocator_arg, this->resource(), std::forward(args)...); + } + + // 2.2. can construct(Args..., MemoryResource*) + template + C4_ALWAYS_INLINE typename std::enable_if::value, void>::type + construct(U* ptr, Args&&... args) + { + c4::construct(ptr, std::forward(args)..., this->resource()); + } + template + C4_ALWAYS_INLINE typename std::enable_if::value, void>::type + construct_n(U* ptr, I n, Args&&... args) + { + c4::construct_n(ptr, n, std::forward(args)..., this->resource()); + } + + template + static C4_ALWAYS_INLINE void destroy(U* ptr) + { + c4::destroy(ptr); + } + template + static C4_ALWAYS_INLINE void destroy_n(U* ptr, I n) + { + c4::destroy_n(ptr, n); + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** An allocator is simply a proxy to a memory resource. + * @param T + * @param MemResProvider + * @ingroup allocators */ +template +class Allocator : public detail::_AllocatorUtil +{ +public: + + using impl_type = detail::_AllocatorUtil; + + using value_type = T; + using pointer = T*; + using const_pointer = T const*; + using reference = T&; + using const_reference = T const&; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + using propagate_on_container_move_assigment = std::true_type; + +public: + + template + bool operator== (Allocator const& that) const + { + return this->resource() == that.resource(); + } + template + bool operator!= (Allocator const& that) const + { + return this->resource() != that.resource(); + } + +public: + + template friend class Allocator; + template + struct rebind + { + using other = Allocator; + }; + template + typename rebind::other rebound() + { + return typename rebind::other(*this); + } + +public: + + using impl_type::impl_type; + Allocator() : impl_type() {} // VS demands this + + template Allocator(Allocator const& that) : impl_type(that.resource()) {} + + Allocator(Allocator const&) = default; + Allocator(Allocator &&) = default; + + Allocator& operator= (Allocator const&) = default; // WTF? why? @see http://en.cppreference.com/w/cpp/memory/polymorphic_allocator + Allocator& operator= (Allocator &&) = default; + + /** returns a default-constructed polymorphic allocator object + * @see http://en.cppreference.com/w/cpp/memory/polymorphic_allocator/select_on_container_copy_construction */ + Allocator select_on_container_copy_construct() const { return Allocator(*this); } + + T* allocate(size_t num_objs, size_t alignment=alignof(T)) + { + C4_ASSERT(this->resource() != nullptr); + C4_ASSERT(alignment >= alignof(T)); + void* vmem = this->resource()->allocate(detail::size_for(num_objs), alignment); + T* mem = static_cast(vmem); + return mem; + } + + void deallocate(T * ptr, size_t num_objs, size_t alignment=alignof(T)) + { + C4_ASSERT(this->resource() != nullptr); + C4_ASSERT(alignment>= alignof(T)); + this->resource()->deallocate(ptr, detail::size_for(num_objs), alignment); + } + + T* reallocate(T* ptr, size_t oldnum, size_t newnum, size_t alignment=alignof(T)) + { + C4_ASSERT(this->resource() != nullptr); + C4_ASSERT(alignment >= alignof(T)); + void* vmem = this->resource()->reallocate(ptr, detail::size_for(oldnum), detail::size_for(newnum), alignment); + T* mem = static_cast(vmem); + return mem; + } + +}; + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @ingroup allocators */ +template +class SmallAllocator : public detail::_AllocatorUtil +{ + static_assert(Alignment >= alignof(T), "invalid alignment"); + + using impl_type = detail::_AllocatorUtil; + + alignas(Alignment) char m_arr[N * sizeof(T)]; + size_t m_num{0}; + +public: + + using value_type = T; + using pointer = T*; + using const_pointer = T const*; + using reference = T&; + using const_reference = T const&; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + using propagate_on_container_move_assigment = std::true_type; + + template + bool operator== (SmallAllocator const&) const + { + return false; + } + template + bool operator!= (SmallAllocator const&) const + { + return true; + } + +public: + + template friend class SmallAllocator; + template + struct rebind + { + using other = SmallAllocator; + }; + template + typename rebind::other rebound() + { + return typename rebind::other(*this); + } + +public: + + using impl_type::impl_type; + SmallAllocator() : impl_type() {} // VS demands this + + template + SmallAllocator(SmallAllocator const& that) : impl_type(that.resource()) + { + C4_ASSERT(that.m_num == 0); + } + + SmallAllocator(SmallAllocator const&) = default; + SmallAllocator(SmallAllocator &&) = default; + + SmallAllocator& operator= (SmallAllocator const&) = default; // WTF? why? @see http://en.cppreference.com/w/cpp/memory/polymorphic_allocator + SmallAllocator& operator= (SmallAllocator &&) = default; + + /** returns a default-constructed polymorphic allocator object + * @see http://en.cppreference.com/w/cpp/memory/polymorphic_allocator/select_on_container_copy_construction */ + SmallAllocator select_on_container_copy_construct() const { return SmallAllocator(*this); } + + T* allocate(size_t num_objs, size_t alignment=Alignment) + { + C4_ASSERT(this->resource() != nullptr); + C4_ASSERT(alignment >= alignof(T)); + void *vmem; + if(m_num + num_objs <= N) + { + vmem = (m_arr + m_num * sizeof(T)); + } + else + { + vmem = this->resource()->allocate(num_objs * sizeof(T), alignment); + } + m_num += num_objs; + T *mem = static_cast(vmem); + return mem; + } + + void deallocate(T * ptr, size_t num_objs, size_t alignment=Alignment) + { + C4_ASSERT(m_num >= num_objs); + m_num -= num_objs; + if((char*)ptr >= m_arr && (char*)ptr < m_arr + (N * sizeof(T))) + { + return; + } + C4_ASSERT(this->resource() != nullptr); + C4_ASSERT(alignment >= alignof(T)); + this->resource()->deallocate(ptr, num_objs * sizeof(T), alignment); + } + + T* reallocate(T * ptr, size_t oldnum, size_t newnum, size_t alignment=Alignment) + { + C4_ASSERT(this->resource() != nullptr); + C4_ASSERT(alignment >= alignof(T)); + if(oldnum <= N && newnum <= N) + { + return m_arr; + } + else if(oldnum <= N && newnum > N) + { + return allocate(newnum, alignment); + } + else if(oldnum > N && newnum <= N) + { + deallocate(ptr, oldnum, alignment); + return m_arr; + } + void* vmem = this->resource()->reallocate(ptr, oldnum * sizeof(T), newnum * sizeof(T), alignment); + T* mem = static_cast(vmem); + return mem; + } + +}; + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** An allocator making use of the global memory resource. + * @ingroup allocators */ +template using allocator = Allocator; +/** An allocator with a per-instance memory resource + * @ingroup allocators */ +template using allocator_mr = Allocator; + +/** @ingroup allocators */ +template using small_allocator = SmallAllocator; +/** @ingroup allocators */ +template using small_allocator_mr = SmallAllocator; + +} // namespace c4 + +#endif /* _C4_ALLOCATOR_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/allocator.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/char_traits.hpp +// https://github.com/biojppm/c4core/src/c4/char_traits.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_CHAR_TRAITS_HPP_ +#define _C4_CHAR_TRAITS_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/config.hpp +//#include "c4/config.hpp" +#if !defined(C4_CONFIG_HPP_) && !defined(_C4_CONFIG_HPP_) +#error "amalgamate: file c4/config.hpp must have been included at this point" +#endif /* C4_CONFIG_HPP_ */ + + +#include // needed because of std::char_traits +#include +#include + +namespace c4 { + +C4_ALWAYS_INLINE bool isspace(char c) { return std::isspace(c) != 0; } +C4_ALWAYS_INLINE bool isspace(wchar_t c) { return std::iswspace(static_cast(c)) != 0; } + +//----------------------------------------------------------------------------- +template +struct char_traits; + +template<> +struct char_traits : public std::char_traits +{ + constexpr static const char whitespace_chars[] = " \f\n\r\t\v"; + constexpr static const size_t num_whitespace_chars = sizeof(whitespace_chars) - 1; +}; + +template<> +struct char_traits : public std::char_traits +{ + constexpr static const wchar_t whitespace_chars[] = L" \f\n\r\t\v"; + constexpr static const size_t num_whitespace_chars = sizeof(whitespace_chars) - 1; +}; + + +//----------------------------------------------------------------------------- +namespace detail { +template +struct needed_chars; +template<> +struct needed_chars +{ + template + C4_ALWAYS_INLINE constexpr static SizeType for_bytes(SizeType num_bytes) + { + return num_bytes; + } +}; +template<> +struct needed_chars +{ + template + C4_ALWAYS_INLINE constexpr static SizeType for_bytes(SizeType num_bytes) + { + // wchar_t is not necessarily 2 bytes. + return (num_bytes / static_cast(sizeof(wchar_t))) + ((num_bytes & static_cast(SizeType(sizeof(wchar_t)) - SizeType(1))) != 0); + } +}; +} // namespace detail + +/** get the number of C characters needed to store a number of bytes */ +template +C4_ALWAYS_INLINE constexpr SizeType num_needed_chars(SizeType num_bytes) +{ + return detail::needed_chars::for_bytes(num_bytes); +} + + +//----------------------------------------------------------------------------- + +/** get the given text string as either char or wchar_t according to the given type */ +#define C4_TXTTY(txt, type) \ + /* is there a smarter way to do this? */\ + c4::detail::literal_as::get(txt, C4_WIDEN(txt)) + +namespace detail { +template +struct literal_as; + +template<> +struct literal_as +{ + C4_ALWAYS_INLINE static constexpr const char* get(const char* str, const wchar_t *) + { + return str; + } +}; +template<> +struct literal_as +{ + C4_ALWAYS_INLINE static constexpr const wchar_t* get(const char*, const wchar_t *wstr) + { + return wstr; + } +}; +} // namespace detail + +} // namespace c4 + +#endif /* _C4_CHAR_TRAITS_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/char_traits.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/hash.hpp +// https://github.com/biojppm/c4core/src/c4/hash.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_HASH_HPP_ +#define _C4_HASH_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/config.hpp +//#include "c4/config.hpp" +#if !defined(C4_CONFIG_HPP_) && !defined(_C4_CONFIG_HPP_) +#error "amalgamate: file c4/config.hpp must have been included at this point" +#endif /* C4_CONFIG_HPP_ */ + +#include + +/** @file hash.hpp */ + +/** @defgroup hash Hash utils + * @see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/ */ + +namespace c4 { + +namespace detail { + +/** @internal + * @ingroup hash + * @see this was taken a great answer in stackoverflow: + * https://stackoverflow.com/a/34597785/5875572 + * @see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/ */ +template +class basic_fnv1a final +{ + + static_assert(std::is_unsigned::value, "need unsigned integer"); + +public: + + using result_type = ResultT; + +private: + + result_type state_ {}; + +public: + + C4_CONSTEXPR14 basic_fnv1a() noexcept : state_ {OffsetBasis} {} + + C4_CONSTEXPR14 void update(const void *const data, const size_t size) noexcept + { + auto cdata = static_cast(data); + auto acc = this->state_; + for(size_t i = 0; i < size; ++i) + { + const auto next = size_t(cdata[i]); + acc = (acc ^ next) * Prime; + } + this->state_ = acc; + } + + C4_CONSTEXPR14 result_type digest() const noexcept + { + return this->state_; + } + +}; + +using fnv1a_32 = basic_fnv1a; +using fnv1a_64 = basic_fnv1a; + +template struct fnv1a; +template<> struct fnv1a<32> { using type = fnv1a_32; }; +template<> struct fnv1a<64> { using type = fnv1a_64; }; + +} // namespace detail + + +/** @ingroup hash */ +template +using fnv1a_t = typename detail::fnv1a::type; + + +/** @ingroup hash */ +C4_CONSTEXPR14 inline size_t hash_bytes(const void *const data, const size_t size) noexcept +{ + fnv1a_t fn{}; + fn.update(data, size); + return fn.digest(); +} + +/** + * @overload hash_bytes + * @ingroup hash */ +template +C4_CONSTEXPR14 inline size_t hash_bytes(const char (&str)[N]) noexcept +{ + fnv1a_t fn{}; + fn.update(str, N); + return fn.digest(); +} + +} // namespace c4 + + +#endif // _C4_HASH_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/hash.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/szconv.hpp +// https://github.com/biojppm/c4core/src/c4/szconv.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_SZCONV_HPP_ +#define _C4_SZCONV_HPP_ + +/** @file szconv.hpp utilities to deal safely with narrowing conversions */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/config.hpp +//#include "c4/config.hpp" +#if !defined(C4_CONFIG_HPP_) && !defined(_C4_CONFIG_HPP_) +#error "amalgamate: file c4/config.hpp must have been included at this point" +#endif /* C4_CONFIG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + + +#include + +namespace c4 { + +/** @todo this would be so much easier with calls to numeric_limits::max()... */ +template +struct is_narrower_size : std::conditional +< + (std::is_signed::value == std::is_signed::value) + ? + (sizeof(SizeOut) < sizeof(SizeIn)) + : + ( + (sizeof(SizeOut) < sizeof(SizeIn)) + || + ( + (sizeof(SizeOut) == sizeof(SizeIn)) + && + (std::is_signed::value && std::is_unsigned::value) + ) + ), + std::true_type, + std::false_type +>::type +{ + static_assert(std::is_integral::value, "must be integral type"); + static_assert(std::is_integral::value, "must be integral type"); +}; + + +/** when SizeOut is wider than SizeIn, assignment can occur without reservations */ +template +C4_ALWAYS_INLINE +typename std::enable_if< ! is_narrower_size::value, SizeOut>::type +szconv(SizeIn sz) noexcept +{ + return static_cast(sz); +} + +/** when SizeOut is narrower than SizeIn, narrowing will occur, so we check + * for overflow. Note that this check is done only if C4_XASSERT is enabled. + * @see C4_XASSERT */ +template +C4_ALWAYS_INLINE +typename std::enable_if::value, SizeOut>::type +szconv(SizeIn sz) C4_NOEXCEPT_X +{ + C4_XASSERT(sz >= 0); + C4_XASSERT_MSG((SizeIn)sz <= (SizeIn)std::numeric_limits::max(), "size conversion overflow: in=%zu", (size_t)sz); + SizeOut szo = static_cast(sz); + return szo; +} + +} // namespace c4 + +#endif /* _C4_SZCONV_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/szconv.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/blob.hpp +// https://github.com/biojppm/c4core/src/c4/blob.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_BLOB_HPP_ +#define _C4_BLOB_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/types.hpp +//#include "c4/types.hpp" +#if !defined(C4_TYPES_HPP_) && !defined(_C4_TYPES_HPP_) +#error "amalgamate: file c4/types.hpp must have been included at this point" +#endif /* C4_TYPES_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + + +/** @file blob.hpp Mutable and immutable binary data blobs. +*/ + +namespace c4 { + +template +struct blob_ +{ + T * buf; + size_t len; + + C4_ALWAYS_INLINE blob_() noexcept : buf(), len() {} + + C4_ALWAYS_INLINE blob_(blob_ const& that) noexcept = default; + C4_ALWAYS_INLINE blob_(blob_ && that) noexcept = default; + C4_ALWAYS_INLINE blob_& operator=(blob_ && that) noexcept = default; + C4_ALWAYS_INLINE blob_& operator=(blob_ const& that) noexcept = default; + + // need to sfinae out copy constructors! (why? isn't the above sufficient?) + #define _C4_REQUIRE_NOT_SAME class=typename std::enable_if<( ! std::is_same::value) && ( ! std::is_pointer::value), T>::type + template C4_ALWAYS_INLINE blob_(U &var) noexcept : buf(reinterpret_cast(&var)), len(sizeof(U)) {} + template C4_ALWAYS_INLINE blob_& operator= (U &var) noexcept { buf = reinterpret_cast(&var); len = sizeof(U); return *this; } + #undef _C4_REQUIRE_NOT_SAME + + template C4_ALWAYS_INLINE blob_(U (&arr)[N]) noexcept : buf(reinterpret_cast(arr)), len(sizeof(U) * N) {} + template C4_ALWAYS_INLINE blob_& operator= (U (&arr)[N]) noexcept { buf = reinterpret_cast(arr); len = sizeof(U) * N; return *this; } + + template + C4_ALWAYS_INLINE blob_(U *ptr, size_t n) noexcept : buf(reinterpret_cast(ptr)), len(sizeof(U) * n) { C4_ASSERT(is_aligned(ptr)); } + C4_ALWAYS_INLINE blob_(void *ptr, size_t n) noexcept : buf(reinterpret_cast(ptr)), len(n) {} + C4_ALWAYS_INLINE blob_(void const *ptr, size_t n) noexcept : buf(reinterpret_cast(ptr)), len(n) {} +}; + +/** an immutable binary blob */ +using cblob = blob_; +/** a mutable binary blob */ +using blob = blob_< byte>; + +C4_MUST_BE_TRIVIAL_COPY(blob); +C4_MUST_BE_TRIVIAL_COPY(cblob); + +} // namespace c4 + +#endif // _C4_BLOB_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/blob.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/substr_fwd.hpp +// https://github.com/biojppm/c4core/src/c4/substr_fwd.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_SUBSTR_FWD_HPP_ +#define _C4_SUBSTR_FWD_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/export.hpp +//#include "c4/export.hpp" +#if !defined(C4_EXPORT_HPP_) && !defined(_C4_EXPORT_HPP_) +#error "amalgamate: file c4/export.hpp must have been included at this point" +#endif /* C4_EXPORT_HPP_ */ + + +namespace c4 { + +#ifndef DOXYGEN +template struct basic_substring; +using csubstr = C4CORE_EXPORT basic_substring; +using substr = C4CORE_EXPORT basic_substring; +#endif // !DOXYGEN + +} // namespace c4 + +#endif /* _C4_SUBSTR_FWD_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/substr_fwd.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/substr.hpp +// https://github.com/biojppm/c4core/src/c4/substr.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_SUBSTR_HPP_ +#define _C4_SUBSTR_HPP_ + +/** @file substr.hpp read+write string views */ + +//included above: +//#include +//included above: +//#include +//included above: +//#include + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/config.hpp +//#include "c4/config.hpp" +#if !defined(C4_CONFIG_HPP_) && !defined(_C4_CONFIG_HPP_) +#error "amalgamate: file c4/config.hpp must have been included at this point" +#endif /* C4_CONFIG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/substr_fwd.hpp +//#include "c4/substr_fwd.hpp" +#if !defined(C4_SUBSTR_FWD_HPP_) && !defined(_C4_SUBSTR_FWD_HPP_) +#error "amalgamate: file c4/substr_fwd.hpp must have been included at this point" +#endif /* C4_SUBSTR_FWD_HPP_ */ + + +#ifdef __clang__ +# pragma clang diagnostic push +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" // disable warnings on size_t>=0, used heavily in assertions below. These assertions are a preparation step for providing the index type as a template parameter. +# pragma GCC diagnostic ignored "-Wuseless-cast" +#endif + + +namespace c4 { + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace detail { + +template +static inline void _do_reverse(C *C4_RESTRICT first, C *C4_RESTRICT last) +{ + while(last > first) + { + C tmp = *last; + *last-- = *first; + *first++ = tmp; + } +} + +} // namespace detail + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// utility macros to deuglify SFINAE code; undefined after the class. +// https://stackoverflow.com/questions/43051882/how-to-disable-a-class-member-funrtion-for-certain-template-types +#define C4_REQUIRE_RW(ret_type) \ + template \ + typename std::enable_if< ! std::is_const::value, ret_type>::type +// non-const-to-const +#define C4_NC2C(ty) \ + typename std::enable_if::value && ( ! std::is_const::value), ty>::type + + +/** a non-owning string-view, consisting of a character pointer + * and a length. + * + * @note The pointer is explicitly restricted. + * @note Because of a C++ limitation, there cannot coexist overloads for + * constructing from a char[N] and a char*; the latter will always be chosen + * by the compiler. To construct an object of this type, call to_substr() or + * to_csubstr(). For a more detailed explanation on why the overloads cannot + * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html + * + * @see to_substr() + * @see to_csubstr() + */ +template +struct C4CORE_EXPORT basic_substring +{ +public: + + /** a restricted pointer to the first character of the substring */ + C * C4_RESTRICT str; + /** the length of the substring */ + size_t len; + +public: + + /** @name Types */ + /** @{ */ + + using CC = typename std::add_const::type; //!< CC=const char + using NCC_ = typename std::remove_const::type; //!< NCC_=non const char + + using ro_substr = basic_substring; + using rw_substr = basic_substring; + + using char_type = C; + using size_type = size_t; + + using iterator = C*; + using const_iterator = CC*; + + enum : size_t { npos = (size_t)-1, NONE = (size_t)-1 }; + + /// convert automatically to substring of const C + operator ro_substr () const { ro_substr s(str, len); return s; } + + /** @} */ + +public: + + /** @name Default construction and assignment */ + /** @{ */ + + constexpr basic_substring() : str(nullptr), len(0) {} + + constexpr basic_substring(basic_substring const&) = default; + constexpr basic_substring(basic_substring &&) = default; + constexpr basic_substring(std::nullptr_t) : str(nullptr), len(0) {} + + basic_substring& operator= (basic_substring const&) = default; + basic_substring& operator= (basic_substring &&) = default; + basic_substring& operator= (std::nullptr_t) { str = nullptr; len = 0; return *this; } + + /** @} */ + +public: + + /** @name Construction and assignment from characters with the same type */ + /** @{ */ + + //basic_substring(C *s_) : str(s_), len(s_ ? strlen(s_) : 0) {} + /** the overload for receiving a single C* pointer will always + * hide the array[N] overload. So it is disabled. If you want to + * construct a substr from a single pointer containing a C-style string, + * you can call c4::to_substr()/c4::to_csubstr(). + * @see c4::to_substr() + * @see c4::to_csubstr() */ + template + constexpr basic_substring(C (&s_)[N]) noexcept : str(s_), len(N-1) {} + basic_substring(C *s_, size_t len_) : str(s_), len(len_) { C4_ASSERT(str || !len_); } + basic_substring(C *beg_, C *end_) : str(beg_), len(static_cast(end_ - beg_)) { C4_ASSERT(end_ >= beg_); } + + //basic_substring& operator= (C *s_) { this->assign(s_); return *this; } + template + basic_substring& operator= (C (&s_)[N]) { this->assign(s_); return *this; } + + //void assign(C *s_) { str = (s_); len = (s_ ? strlen(s_) : 0); } + /** the overload for receiving a single C* pointer will always + * hide the array[N] overload. So it is disabled. If you want to + * construct a substr from a single pointer containing a C-style string, + * you can call c4::to_substr()/c4::to_csubstr(). + * @see c4::to_substr() + * @see c4::to_csubstr() */ + template + void assign(C (&s_)[N]) { str = (s_); len = (N-1); } + void assign(C *s_, size_t len_) { str = s_; len = len_; C4_ASSERT(str || !len_); } + void assign(C *beg_, C *end_) { C4_ASSERT(end_ >= beg_); str = (beg_); len = (end_ - beg_); } + + void clear() { str = nullptr; len = 0; } + + /** @} */ + +public: + + /** @name Construction from non-const characters */ + /** @{ */ + + // when the char type is const, allow construction and assignment from non-const chars + + /** only available when the char type is const */ + template explicit basic_substring(C4_NC2C(U) (&s_)[N]) { str = s_; len = N-1; } + /** only available when the char type is const */ + template< class U=NCC_> basic_substring(C4_NC2C(U) *s_, size_t len_) { str = s_; len = len_; } + /** only available when the char type is const */ + template< class U=NCC_> basic_substring(C4_NC2C(U) *beg_, C4_NC2C(U) *end_) { C4_ASSERT(end_ >= beg_); str = beg_; len = end_ - beg_; } + + /** only available when the char type is const */ + template void assign(C4_NC2C(U) (&s_)[N]) { str = s_; len = N-1; } + /** only available when the char type is const */ + template< class U=NCC_> void assign(C4_NC2C(U) *s_, size_t len_) { str = s_; len = len_; } + /** only available when the char type is const */ + template< class U=NCC_> void assign(C4_NC2C(U) *beg_, C4_NC2C(U) *end_) { C4_ASSERT(end_ >= beg_); str = beg_; len = end_ - beg_; } + + /** only available when the char type is const */ + template + basic_substring& operator=(C4_NC2C(U) (&s_)[N]) { str = s_; len = N-1; return *this; } + + /** @} */ + +public: + + /** @name Standard accessor methods */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool has_str() const noexcept { return ! empty() && str[0] != C(0); } + C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { return (len == 0 || str == nullptr); } + C4_ALWAYS_INLINE C4_PURE bool not_empty() const noexcept { return (len != 0 && str != nullptr); } + C4_ALWAYS_INLINE C4_PURE size_t size() const noexcept { return len; } + + C4_ALWAYS_INLINE C4_PURE iterator begin() noexcept { return str; } + C4_ALWAYS_INLINE C4_PURE iterator end () noexcept { return str + len; } + + C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { return str; } + C4_ALWAYS_INLINE C4_PURE const_iterator end () const noexcept { return str + len; } + + C4_ALWAYS_INLINE C4_PURE C * data() noexcept { return str; } + C4_ALWAYS_INLINE C4_PURE C const* data() const noexcept { return str; } + + C4_ALWAYS_INLINE C4_PURE C & operator[] (size_t i) noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; } + C4_ALWAYS_INLINE C4_PURE C const& operator[] (size_t i) const noexcept { C4_ASSERT(i >= 0 && i < len); return str[i]; } + + C4_ALWAYS_INLINE C4_PURE C & front() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; } + C4_ALWAYS_INLINE C4_PURE C const& front() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *str; } + + C4_ALWAYS_INLINE C4_PURE C & back() noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); } + C4_ALWAYS_INLINE C4_PURE C const& back() const noexcept { C4_ASSERT(len > 0 && str != nullptr); return *(str + len - 1); } + + /** @} */ + +public: + + /** @name Comparison methods */ + /** @{ */ + + C4_PURE int compare(C const c) const noexcept + { + C4_XASSERT((str != nullptr) || len == 0); + if(C4_LIKELY(str != nullptr && len > 0)) + return (*str != c) ? *str - c : (static_cast(len) - 1); + else + return -1; + } + + C4_PURE int compare(const char *C4_RESTRICT that, size_t sz) const noexcept + { + C4_XASSERT(that || sz == 0); + C4_XASSERT(str || len == 0); + if(C4_LIKELY(str && that)) + { + { + const size_t min = len < sz ? len : sz; + for(size_t i = 0; i < min; ++i) + if(str[i] != that[i]) + return str[i] < that[i] ? -1 : 1; + } + if(len < sz) + return -1; + else if(len == sz) + return 0; + else + return 1; + } + else if(len == sz) + { + C4_XASSERT(len == 0 && sz == 0); + return 0; + } + return len < sz ? -1 : 1; + } + + C4_ALWAYS_INLINE C4_PURE int compare(ro_substr const that) const noexcept { return this->compare(that.str, that.len); } + + C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return str == nullptr; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return str != nullptr; } + + C4_ALWAYS_INLINE C4_PURE bool operator== (C const c) const noexcept { return this->compare(c) == 0; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (C const c) const noexcept { return this->compare(c) != 0; } + C4_ALWAYS_INLINE C4_PURE bool operator< (C const c) const noexcept { return this->compare(c) < 0; } + C4_ALWAYS_INLINE C4_PURE bool operator> (C const c) const noexcept { return this->compare(c) > 0; } + C4_ALWAYS_INLINE C4_PURE bool operator<= (C const c) const noexcept { return this->compare(c) <= 0; } + C4_ALWAYS_INLINE C4_PURE bool operator>= (C const c) const noexcept { return this->compare(c) >= 0; } + + template C4_ALWAYS_INLINE C4_PURE bool operator== (basic_substring const that) const noexcept { return this->compare(that) == 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator!= (basic_substring const that) const noexcept { return this->compare(that) != 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator< (basic_substring const that) const noexcept { return this->compare(that) < 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator> (basic_substring const that) const noexcept { return this->compare(that) > 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator<= (basic_substring const that) const noexcept { return this->compare(that) <= 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator>= (basic_substring const that) const noexcept { return this->compare(that) >= 0; } + + template C4_ALWAYS_INLINE C4_PURE bool operator== (const char (&that)[N]) const noexcept { return this->compare(that, N-1) == 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator!= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) != 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator< (const char (&that)[N]) const noexcept { return this->compare(that, N-1) < 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator> (const char (&that)[N]) const noexcept { return this->compare(that, N-1) > 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator<= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) <= 0; } + template C4_ALWAYS_INLINE C4_PURE bool operator>= (const char (&that)[N]) const noexcept { return this->compare(that, N-1) >= 0; } + + /** @} */ + +public: + + /** @name Sub-selection methods */ + /** @{ */ + + /** true if *this is a substring of that (ie, from the same buffer) */ + C4_ALWAYS_INLINE C4_PURE bool is_sub(ro_substr const that) const noexcept + { + return that.is_super(*this); + } + + /** true if that is a substring of *this (ie, from the same buffer) */ + C4_ALWAYS_INLINE C4_PURE bool is_super(ro_substr const that) const noexcept + { + if(C4_LIKELY(len > 0)) + return that.str >= str && that.str+that.len <= str+len; + else + return that.len == 0 && that.str == str && str != nullptr; + } + + /** true if there is overlap of at least one element between that and *this */ + C4_ALWAYS_INLINE C4_PURE bool overlaps(ro_substr const that) const noexcept + { + // thanks @timwynants + return that.str+that.len > str && that.str < str+len; + } + +public: + + /** return [first,len[ */ + C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first) const noexcept + { + C4_ASSERT(first >= 0 && first <= len); + return basic_substring(str + first, len - first); + } + + /** return [first,first+num[. If num==npos, return [first,len[ */ + C4_ALWAYS_INLINE C4_PURE basic_substring sub(size_t first, size_t num) const noexcept + { + C4_ASSERT(first >= 0 && first <= len); + C4_ASSERT((num >= 0 && num <= len) || (num == npos)); + size_t rnum = num != npos ? num : len - first; + C4_ASSERT((first >= 0 && first + rnum <= len) || (num == 0)); + return basic_substring(str + first, rnum); + } + + /** return [first,last[. If last==npos, return [first,len[ */ + C4_ALWAYS_INLINE C4_PURE basic_substring range(size_t first, size_t last=npos) const noexcept + { + C4_ASSERT(first >= 0 && first <= len); + last = last != npos ? last : len; + C4_ASSERT(first <= last); + C4_ASSERT(last >= 0 && last <= len); + return basic_substring(str + first, last - first); + } + + /** return the first @p num elements: [0,num[*/ + C4_ALWAYS_INLINE C4_PURE basic_substring first(size_t num) const noexcept + { + C4_ASSERT(num <= len || num == npos); + return basic_substring(str, num != npos ? num : len); + } + + /** return the last @num elements: [len-num,len[*/ + C4_ALWAYS_INLINE C4_PURE basic_substring last(size_t num) const noexcept + { + C4_ASSERT(num <= len || num == npos); + return num != npos ? + basic_substring(str + len - num, num) : + *this; + } + + /** offset from the ends: return [left,len-right[ ; ie, trim a + number of characters from the left and right. This is + equivalent to python's negative list indices. */ + C4_ALWAYS_INLINE C4_PURE basic_substring offs(size_t left, size_t right) const noexcept + { + C4_ASSERT(left >= 0 && left <= len); + C4_ASSERT(right >= 0 && right <= len); + C4_ASSERT(left <= len - right + 1); + return basic_substring(str + left, len - right - left); + } + + /** return [0, pos[ . Same as .first(pos), but provided for compatibility with .right_of() */ + C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos) const noexcept + { + C4_ASSERT(pos <= len || pos == npos); + return (pos != npos) ? + basic_substring(str, pos) : + *this; + } + + /** return [0, pos+include_pos[ . Same as .first(pos+1), but provided for compatibility with .right_of() */ + C4_ALWAYS_INLINE C4_PURE basic_substring left_of(size_t pos, bool include_pos) const noexcept + { + C4_ASSERT(pos <= len || pos == npos); + return (pos != npos) ? + basic_substring(str, pos+include_pos) : + *this; + } + + /** return [pos+1, len[ */ + C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos) const noexcept + { + C4_ASSERT(pos <= len || pos == npos); + return (pos != npos) ? + basic_substring(str + (pos + 1), len - (pos + 1)) : + basic_substring(str + len, size_t(0)); + } + + /** return [pos+!include_pos, len[ */ + C4_ALWAYS_INLINE C4_PURE basic_substring right_of(size_t pos, bool include_pos) const noexcept + { + C4_ASSERT(pos <= len || pos == npos); + return (pos != npos) ? + basic_substring(str + (pos + !include_pos), len - (pos + !include_pos)) : + basic_substring(str + len, size_t(0)); + } + +public: + + /** given @p subs a substring of the current string, get the + * portion of the current string to the left of it */ + C4_ALWAYS_INLINE C4_PURE basic_substring left_of(ro_substr const subs) const noexcept + { + C4_ASSERT(is_super(subs) || subs.empty()); + auto ssb = subs.begin(); + auto b = begin(); + auto e = end(); + if(ssb >= b && ssb <= e) + return sub(0, static_cast(ssb - b)); + else + return sub(0, 0); + } + + /** given @p subs a substring of the current string, get the + * portion of the current string to the right of it */ + C4_ALWAYS_INLINE C4_PURE basic_substring right_of(ro_substr const subs) const noexcept + { + C4_ASSERT(is_super(subs) || subs.empty()); + auto sse = subs.end(); + auto b = begin(); + auto e = end(); + if(sse >= b && sse <= e) + return sub(static_cast(sse - b), static_cast(e - sse)); + else + return sub(0, 0); + } + + /** @} */ + +public: + + /** @name Removing characters (trim()) / patterns (strip()) from the tips of the string */ + /** @{ */ + + /** trim left */ + basic_substring triml(const C c) const + { + if( ! empty()) + { + size_t pos = first_not_of(c); + if(pos != npos) + return sub(pos); + } + return sub(0, 0); + } + /** trim left ANY of the characters. + * @see stripl() to remove a pattern from the left */ + basic_substring triml(ro_substr chars) const + { + if( ! empty()) + { + size_t pos = first_not_of(chars); + if(pos != npos) + return sub(pos); + } + return sub(0, 0); + } + + /** trim the character c from the right */ + basic_substring trimr(const C c) const + { + if( ! empty()) + { + size_t pos = last_not_of(c, npos); + if(pos != npos) + return sub(0, pos+1); + } + return sub(0, 0); + } + /** trim right ANY of the characters + * @see stripr() to remove a pattern from the right */ + basic_substring trimr(ro_substr chars) const + { + if( ! empty()) + { + size_t pos = last_not_of(chars, npos); + if(pos != npos) + return sub(0, pos+1); + } + return sub(0, 0); + } + + /** trim the character c left and right */ + basic_substring trim(const C c) const + { + return triml(c).trimr(c); + } + /** trim left and right ANY of the characters + * @see strip() to remove a pattern from the left and right */ + basic_substring trim(ro_substr const chars) const + { + return triml(chars).trimr(chars); + } + + /** remove a pattern from the left + * @see triml() to remove characters*/ + basic_substring stripl(ro_substr pattern) const + { + if( ! begins_with(pattern)) + return *this; + return sub(pattern.len < len ? pattern.len : len); + } + + /** remove a pattern from the right + * @see trimr() to remove characters*/ + basic_substring stripr(ro_substr pattern) const + { + if( ! ends_with(pattern)) + return *this; + return left_of(len - (pattern.len < len ? pattern.len : len)); + } + + /** @} */ + +public: + + /** @name Lookup methods */ + /** @{ */ + + inline size_t find(const C c, size_t start_pos=0) const + { + return first_of(c, start_pos); + } + inline size_t find(ro_substr pattern, size_t start_pos=0) const + { + C4_ASSERT(start_pos == npos || (start_pos >= 0 && start_pos <= len)); + if(len < pattern.len) return npos; + for(size_t i = start_pos, e = len - pattern.len + 1; i < e; ++i) + { + bool gotit = true; + for(size_t j = 0; j < pattern.len; ++j) + { + C4_ASSERT(i + j < len); + if(str[i + j] != pattern.str[j]) + { + gotit = false; + break; + } + } + if(gotit) + { + return i; + } + } + return npos; + } + +public: + + /** count the number of occurrences of c */ + inline size_t count(const C c, size_t pos=0) const + { + C4_ASSERT(pos >= 0 && pos <= len); + size_t num = 0; + pos = find(c, pos); + while(pos != npos) + { + ++num; + pos = find(c, pos + 1); + } + return num; + } + + /** count the number of occurrences of s */ + inline size_t count(ro_substr c, size_t pos=0) const + { + C4_ASSERT(pos >= 0 && pos <= len); + size_t num = 0; + pos = find(c, pos); + while(pos != npos) + { + ++num; + pos = find(c, pos + c.len); + } + return num; + } + + /** get the substr consisting of the first occurrence of @p c after @p pos, or an empty substr if none occurs */ + inline basic_substring select(const C c, size_t pos=0) const + { + pos = find(c, pos); + return pos != npos ? sub(pos, 1) : basic_substring(); + } + + /** get the substr consisting of the first occurrence of @p pattern after @p pos, or an empty substr if none occurs */ + inline basic_substring select(ro_substr pattern, size_t pos=0) const + { + pos = find(pattern, pos); + return pos != npos ? sub(pos, pattern.len) : basic_substring(); + } + +public: + + struct first_of_any_result + { + size_t which; + size_t pos; + inline operator bool() const { return which != NONE && pos != npos; } + }; + + first_of_any_result first_of_any(ro_substr s0, ro_substr s1) const + { + ro_substr s[2] = {s0, s1}; + return first_of_any_iter(&s[0], &s[0] + 2); + } + + first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2) const + { + ro_substr s[3] = {s0, s1, s2}; + return first_of_any_iter(&s[0], &s[0] + 3); + } + + first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3) const + { + ro_substr s[4] = {s0, s1, s2, s3}; + return first_of_any_iter(&s[0], &s[0] + 4); + } + + first_of_any_result first_of_any(ro_substr s0, ro_substr s1, ro_substr s2, ro_substr s3, ro_substr s4) const + { + ro_substr s[5] = {s0, s1, s2, s3, s4}; + return first_of_any_iter(&s[0], &s[0] + 5); + } + + template + first_of_any_result first_of_any_iter(It first_span, It last_span) const + { + for(size_t i = 0; i < len; ++i) + { + size_t curr = 0; + for(It it = first_span; it != last_span; ++curr, ++it) + { + auto const& chars = *it; + if((i + chars.len) > len) continue; + bool gotit = true; + for(size_t j = 0; j < chars.len; ++j) + { + C4_ASSERT(i + j < len); + if(str[i + j] != chars[j]) + { + gotit = false; + break; + } + } + if(gotit) + { + return {curr, i}; + } + } + } + return {NONE, npos}; + } + +public: + + /** true if the first character of the string is @p c */ + bool begins_with(const C c) const + { + return len > 0 ? str[0] == c : false; + } + + /** true if the first @p num characters of the string are @p c */ + bool begins_with(const C c, size_t num) const + { + if(len < num) + { + return false; + } + for(size_t i = 0; i < num; ++i) + { + if(str[i] != c) + { + return false; + } + } + return true; + } + + /** true if the string begins with the given @p pattern */ + bool begins_with(ro_substr pattern) const + { + if(len < pattern.len) + { + return false; + } + for(size_t i = 0; i < pattern.len; ++i) + { + if(str[i] != pattern[i]) + { + return false; + } + } + return true; + } + + /** true if the first character of the string is any of the given @p chars */ + bool begins_with_any(ro_substr chars) const + { + if(len == 0) + { + return false; + } + for(size_t i = 0; i < chars.len; ++i) + { + if(str[0] == chars.str[i]) + { + return true; + } + } + return false; + } + + /** true if the last character of the string is @p c */ + bool ends_with(const C c) const + { + return len > 0 ? str[len-1] == c : false; + } + + /** true if the last @p num characters of the string are @p c */ + bool ends_with(const C c, size_t num) const + { + if(len < num) + { + return false; + } + for(size_t i = len - num; i < len; ++i) + { + if(str[i] != c) + { + return false; + } + } + return true; + } + + /** true if the string ends with the given @p pattern */ + bool ends_with(ro_substr pattern) const + { + if(len < pattern.len) + { + return false; + } + for(size_t i = 0, s = len-pattern.len; i < pattern.len; ++i) + { + if(str[s+i] != pattern[i]) + { + return false; + } + } + return true; + } + + /** true if the last character of the string is any of the given @p chars */ + bool ends_with_any(ro_substr chars) const + { + if(len == 0) + { + return false; + } + for(size_t i = 0; i < chars.len; ++i) + { + if(str[len - 1] == chars[i]) + { + return true; + } + } + return false; + } + +public: + + /** @return the first position where c is found in the string, or npos if none is found */ + size_t first_of(const C c, size_t start=0) const + { + C4_ASSERT(start == npos || (start >= 0 && start <= len)); + for(size_t i = start; i < len; ++i) + { + if(str[i] == c) + return i; + } + return npos; + } + + /** @return the last position where c is found in the string, or npos if none is found */ + size_t last_of(const C c, size_t start=npos) const + { + C4_ASSERT(start == npos || (start >= 0 && start <= len)); + if(start == npos) + start = len; + for(size_t i = start-1; i != size_t(-1); --i) + { + if(str[i] == c) + return i; + } + return npos; + } + + /** @return the first position where ANY of the chars is found in the string, or npos if none is found */ + size_t first_of(ro_substr chars, size_t start=0) const + { + C4_ASSERT(start == npos || (start >= 0 && start <= len)); + for(size_t i = start; i < len; ++i) + { + for(size_t j = 0; j < chars.len; ++j) + { + if(str[i] == chars[j]) + return i; + } + } + return npos; + } + + /** @return the last position where ANY of the chars is found in the string, or npos if none is found */ + size_t last_of(ro_substr chars, size_t start=npos) const + { + C4_ASSERT(start == npos || (start >= 0 && start <= len)); + if(start == npos) + start = len; + for(size_t i = start-1; i != size_t(-1); --i) + { + for(size_t j = 0; j < chars.len; ++j) + { + if(str[i] == chars[j]) + return i; + } + } + return npos; + } + +public: + + size_t first_not_of(const C c, size_t start=0) const + { + C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0)); + for(size_t i = start; i < len; ++i) + { + if(str[i] != c) + return i; + } + return npos; + } + + size_t last_not_of(const C c, size_t start=npos) const + { + C4_ASSERT(start == npos || (start >= 0 && start <= len)); + if(start == npos) + start = len; + for(size_t i = start-1; i != size_t(-1); --i) + { + if(str[i] != c) + return i; + } + return npos; + } + + size_t first_not_of(ro_substr chars, size_t start=0) const + { + C4_ASSERT((start >= 0 && start <= len) || (start == len && len == 0)); + for(size_t i = start; i < len; ++i) + { + bool gotit = true; + for(size_t j = 0; j < chars.len; ++j) + { + if(str[i] == chars.str[j]) + { + gotit = false; + break; + } + } + if(gotit) + { + return i; + } + } + return npos; + } + + size_t last_not_of(ro_substr chars, size_t start=npos) const + { + C4_ASSERT(start == npos || (start >= 0 && start <= len)); + if(start == npos) + start = len; + for(size_t i = start-1; i != size_t(-1); --i) + { + bool gotit = true; + for(size_t j = 0; j < chars.len; ++j) + { + if(str[i] == chars.str[j]) + { + gotit = false; + break; + } + } + if(gotit) + { + return i; + } + } + return npos; + } + + /** @} */ + +public: + + /** @name Range lookup methods */ + /** @{ */ + + /** get the range delimited by an open-close pair of characters. + * @note There must be no nested pairs. + * @note No checks for escapes are performed. */ + basic_substring pair_range(CC open, CC close) const + { + size_t b = find(open); + if(b == npos) + return basic_substring(); + size_t e = find(close, b+1); + if(e == npos) + return basic_substring(); + basic_substring ret = range(b, e+1); + C4_ASSERT(ret.sub(1).find(open) == npos); + return ret; + } + + /** get the range delimited by a single open-close character (eg, quotes). + * @note The open-close character can be escaped. */ + basic_substring pair_range_esc(CC open_close, CC escape=CC('\\')) + { + size_t b = find(open_close); + if(b == npos) return basic_substring(); + for(size_t i = b+1; i < len; ++i) + { + CC c = str[i]; + if(c == open_close) + { + if(str[i-1] != escape) + { + return range(b, i+1); + } + } + } + return basic_substring(); + } + + /** get the range delimited by an open-close pair of characters, + * with possibly nested occurrences. No checks for escapes are + * performed. */ + basic_substring pair_range_nested(CC open, CC close) const + { + size_t b = find(open); + if(b == npos) return basic_substring(); + size_t e, curr = b+1, count = 0; + const char both[] = {open, close, '\0'}; + while((e = first_of(both, curr)) != npos) + { + if(str[e] == open) + { + ++count; + curr = e+1; + } + else if(str[e] == close) + { + if(count == 0) return range(b, e+1); + --count; + curr = e+1; + } + } + return basic_substring(); + } + + basic_substring unquoted() const + { + constexpr const C dq('"'), sq('\''); + if(len >= 2 && (str[len - 2] != C('\\')) && + ((begins_with(sq) && ends_with(sq)) + || + (begins_with(dq) && ends_with(dq)))) + { + return range(1, len -1); + } + return *this; + } + + /** @} */ + +public: + + /** @name Number-matching query methods */ + /** @{ */ + + /** @return true if the substring contents are a floating-point or integer number. + * @note any leading or trailing whitespace will return false. */ + bool is_number() const + { + if(empty() || (first_non_empty_span().empty())) + return false; + if(first_uint_span() == *this) + return true; + if(first_int_span() == *this) + return true; + if(first_real_span() == *this) + return true; + return false; + } + + /** @return true if the substring contents are a real number. + * @note any leading or trailing whitespace will return false. */ + bool is_real() const + { + if(empty() || (first_non_empty_span().empty())) + return false; + if(first_real_span() == *this) + return true; + return false; + } + + /** @return true if the substring contents are an integer number. + * @note any leading or trailing whitespace will return false. */ + bool is_integer() const + { + if(empty() || (first_non_empty_span().empty())) + return false; + if(first_uint_span() == *this) + return true; + if(first_int_span() == *this) + return true; + return false; + } + + /** @return true if the substring contents are an unsigned integer number. + * @note any leading or trailing whitespace will return false. */ + bool is_unsigned_integer() const + { + if(empty() || (first_non_empty_span().empty())) + return false; + if(first_uint_span() == *this) + return true; + return false; + } + + /** get the first span consisting exclusively of non-empty characters */ + basic_substring first_non_empty_span() const + { + constexpr const ro_substr empty_chars(" \n\r\t"); + size_t pos = first_not_of(empty_chars); + if(pos == npos) + return first(0); + auto ret = sub(pos); + pos = ret.first_of(empty_chars); + return ret.first(pos); + } + + /** get the first span which can be interpreted as an unsigned integer */ + basic_substring first_uint_span() const + { + basic_substring ne = first_non_empty_span(); + if(ne.empty()) + return ne; + if(ne.str[0] == '-') + return first(0); + size_t skip_start = (ne.str[0] == '+') ? 1 : 0; + return ne._first_integral_span(skip_start); + } + + /** get the first span which can be interpreted as a signed integer */ + basic_substring first_int_span() const + { + basic_substring ne = first_non_empty_span(); + if(ne.empty()) + return ne; + size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-') ? 1 : 0; + return ne._first_integral_span(skip_start); + } + + basic_substring _first_integral_span(size_t skip_start) const + { + C4_ASSERT(!empty()); + if(skip_start == len) + return first(0); + C4_ASSERT(skip_start < len); + if(len >= skip_start + 3) + { + if(str[skip_start] != '0') + { + for(size_t i = skip_start; i < len; ++i) + { + char c = str[i]; + if(c < '0' || c > '9') + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); + } + } + else + { + char next = str[skip_start + 1]; + if(next == 'x' || next == 'X') + { + skip_start += 2; + for(size_t i = skip_start; i < len; ++i) + { + const char c = str[i]; + if( ! _is_hex_char(c)) + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); + } + return *this; + } + else if(next == 'b' || next == 'B') + { + skip_start += 2; + for(size_t i = skip_start; i < len; ++i) + { + const char c = str[i]; + if(c != '0' && c != '1') + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); + } + return *this; + } + else if(next == 'o' || next == 'O') + { + skip_start += 2; + for(size_t i = skip_start; i < len; ++i) + { + const char c = str[i]; + if(c < '0' || c > '7') + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); + } + return *this; + } + } + } + // must be a decimal, or it is not a an number + for(size_t i = skip_start; i < len; ++i) + { + const char c = str[i]; + if(c < '0' || c > '9') + return i > skip_start && _is_delim_char(c) ? first(i) : first(0); + } + return *this; + } + + /** get the first span which can be interpreted as a real (floating-point) number */ + basic_substring first_real_span() const + { + basic_substring ne = first_non_empty_span(); + if(ne.empty()) + return ne; + size_t skip_start = (ne.str[0] == '+' || ne.str[0] == '-'); + C4_ASSERT(skip_start == 0 || skip_start == 1); + // if we have at least three digits after the leading sign, it + // can be decimal, or hex, or bin or oct. Ex: + // non-decimal: 0x0, 0b0, 0o0 + // decimal: 1.0, 10., 1e1, 100, inf, nan, infinity + if(ne.len >= skip_start+3) + { + // if it does not have leading 0, it must be decimal, or it is not a real + if(ne.str[skip_start] != '0') + { + if(ne.str[skip_start] == 'i') // is it infinity or inf? + { + basic_substring word = ne._word_follows(skip_start + 1, "nfinity"); + if(word.len) + return word; + return ne._word_follows(skip_start + 1, "nf"); + } + else if(ne.str[skip_start] == 'n') // is it nan? + { + return ne._word_follows(skip_start + 1, "an"); + } + else // must be a decimal, or it is not a real + { + return ne._first_real_span_dec(skip_start); + } + } + else // starts with 0. is it 0x, 0b or 0o? + { + const char next = ne.str[skip_start + 1]; + // hexadecimal + if(next == 'x' || next == 'X') + return ne._first_real_span_hex(skip_start + 2); + // binary + else if(next == 'b' || next == 'B') + return ne._first_real_span_bin(skip_start + 2); + // octal + else if(next == 'o' || next == 'O') + return ne._first_real_span_oct(skip_start + 2); + // none of the above. may still be a decimal. + else + return ne._first_real_span_dec(skip_start); // do not skip the 0. + } + } + // less than 3 chars after the leading sign. It is either a + // decimal or it is not a real. (cannot be any of 0x0, etc). + return ne._first_real_span_dec(skip_start); + } + + /** true if the character is a delimiter character *at the end* */ + static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_delim_char(char c) noexcept + { + return c == ' ' || c == '\n' + || c == ']' || c == ')' || c == '}' + || c == ',' || c == ';' || c == '\r' || c == '\t' || c == '\0'; + } + + /** true if the character is in [0-9a-fA-F] */ + static constexpr C4_ALWAYS_INLINE C4_CONST bool _is_hex_char(char c) noexcept + { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + } + + C4_NO_INLINE C4_PURE basic_substring _word_follows(size_t pos, csubstr word) const noexcept + { + size_t posend = pos + word.len; + if(len >= posend && sub(pos, word.len) == word) + if(len == posend || _is_delim_char(str[posend])) + return first(posend); + return first(0); + } + + // this function is declared inside the class to avoid a VS error with __declspec(dllimport) + C4_NO_INLINE C4_PURE basic_substring _first_real_span_dec(size_t pos) const noexcept + { + bool intchars = false; + bool fracchars = false; + bool powchars; + // integral part + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + { + intchars = true; + } + else if(c == '.') + { + ++pos; + goto fractional_part_dec; + } + else if(c == 'e' || c == 'E') + { + ++pos; + goto power_part_dec; + } + else if(_is_delim_char(c)) + { + return intchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + // no . or p were found; this is either an integral number + // or not a number at all + return intchars ? + *this : + first(0); + fractional_part_dec: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == '.'); + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + { + fracchars = true; + } + else if(c == 'e' || c == 'E') + { + ++pos; + goto power_part_dec; + } + else if(_is_delim_char(c)) + { + return intchars || fracchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + return intchars || fracchars ? + *this : + first(0); + power_part_dec: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == 'e' || str[pos - 1] == 'E'); + // either a + or a - is expected here, followed by more chars. + // also, using (pos+1) in this check will cause an early + // return when no more chars follow the sign. + if(len <= (pos+1) || ((!intchars) && (!fracchars))) + return first(0); + ++pos; // this was the sign. + // ... so the (pos+1) ensures that we enter the loop and + // hence that there exist chars in the power part + powchars = false; + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + powchars = true; + else if(powchars && _is_delim_char(c)) + return first(pos); + else + return first(0); + } + return *this; + } + + // this function is declared inside the class to avoid a VS error with __declspec(dllimport) + C4_NO_INLINE C4_PURE basic_substring _first_real_span_hex(size_t pos) const noexcept + { + bool intchars = false; + bool fracchars = false; + bool powchars; + // integral part + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(_is_hex_char(c)) + { + intchars = true; + } + else if(c == '.') + { + ++pos; + goto fractional_part_hex; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_hex; + } + else if(_is_delim_char(c)) + { + return intchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + // no . or p were found; this is either an integral number + // or not a number at all + return intchars ? + *this : + first(0); + fractional_part_hex: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == '.'); + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(_is_hex_char(c)) + { + fracchars = true; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_hex; + } + else if(_is_delim_char(c)) + { + return intchars || fracchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + return intchars || fracchars ? + *this : + first(0); + power_part_hex: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P'); + // either a + or a - is expected here, followed by more chars. + // also, using (pos+1) in this check will cause an early + // return when no more chars follow the sign. + if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars))) + return first(0); + ++pos; // this was the sign. + // ... so the (pos+1) ensures that we enter the loop and + // hence that there exist chars in the power part + powchars = false; + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + powchars = true; + else if(powchars && _is_delim_char(c)) + return first(pos); + else + return first(0); + } + return *this; + } + + // this function is declared inside the class to avoid a VS error with __declspec(dllimport) + C4_NO_INLINE C4_PURE basic_substring _first_real_span_bin(size_t pos) const noexcept + { + bool intchars = false; + bool fracchars = false; + bool powchars; + // integral part + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c == '0' || c == '1') + { + intchars = true; + } + else if(c == '.') + { + ++pos; + goto fractional_part_bin; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_bin; + } + else if(_is_delim_char(c)) + { + return intchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + // no . or p were found; this is either an integral number + // or not a number at all + return intchars ? + *this : + first(0); + fractional_part_bin: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == '.'); + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c == '0' || c == '1') + { + fracchars = true; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_bin; + } + else if(_is_delim_char(c)) + { + return intchars || fracchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + return intchars || fracchars ? + *this : + first(0); + power_part_bin: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P'); + // either a + or a - is expected here, followed by more chars. + // also, using (pos+1) in this check will cause an early + // return when no more chars follow the sign. + if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars))) + return first(0); + ++pos; // this was the sign. + // ... so the (pos+1) ensures that we enter the loop and + // hence that there exist chars in the power part + powchars = false; + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + powchars = true; + else if(powchars && _is_delim_char(c)) + return first(pos); + else + return first(0); + } + return *this; + } + + // this function is declared inside the class to avoid a VS error with __declspec(dllimport) + C4_NO_INLINE C4_PURE basic_substring _first_real_span_oct(size_t pos) const noexcept + { + bool intchars = false; + bool fracchars = false; + bool powchars; + // integral part + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '7') + { + intchars = true; + } + else if(c == '.') + { + ++pos; + goto fractional_part_oct; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_oct; + } + else if(_is_delim_char(c)) + { + return intchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + // no . or p were found; this is either an integral number + // or not a number at all + return intchars ? + *this : + first(0); + fractional_part_oct: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == '.'); + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '7') + { + fracchars = true; + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power_part_oct; + } + else if(_is_delim_char(c)) + { + return intchars || fracchars ? first(pos) : first(0); + } + else + { + return first(0); + } + } + return intchars || fracchars ? + *this : + first(0); + power_part_oct: + C4_ASSERT(pos > 0); + C4_ASSERT(str[pos - 1] == 'p' || str[pos - 1] == 'P'); + // either a + or a - is expected here, followed by more chars. + // also, using (pos+1) in this check will cause an early + // return when no more chars follow the sign. + if(len <= (pos+1) || (str[pos] != '+' && str[pos] != '-') || ((!intchars) && (!fracchars))) + return first(0); + ++pos; // this was the sign. + // ... so the (pos+1) ensures that we enter the loop and + // hence that there exist chars in the power part + powchars = false; + for( ; pos < len; ++pos) + { + const char c = str[pos]; + if(c >= '0' && c <= '9') + powchars = true; + else if(powchars && _is_delim_char(c)) + return first(pos); + else + return first(0); + } + return *this; + } + + /** @} */ + +public: + + /** @name Splitting methods */ + /** @{ */ + + /** returns true if the string has not been exhausted yet, meaning + * it's ok to call next_split() again. When no instance of sep + * exists in the string, returns the full string. When the input + * is an empty string, the output string is the empty string. */ + bool next_split(C sep, size_t *C4_RESTRICT start_pos, basic_substring *C4_RESTRICT out) const + { + if(C4_LIKELY(*start_pos < len)) + { + for(size_t i = *start_pos, e = len; i < e; i++) + { + if(str[i] == sep) + { + out->assign(str + *start_pos, i - *start_pos); + *start_pos = i+1; + return true; + } + } + out->assign(str + *start_pos, len - *start_pos); + *start_pos = len + 1; + return true; + } + else + { + bool valid = len > 0 && (*start_pos == len); + if(valid && !empty() && str[len-1] == sep) + { + out->assign(str + len, (size_t)0); // the cast is needed to prevent overload ambiguity + } + else + { + out->assign(str + len + 1, (size_t)0); // the cast is needed to prevent overload ambiguity + } + *start_pos = len + 1; + return valid; + } + } + +private: + + struct split_proxy_impl + { + struct split_iterator_impl + { + split_proxy_impl const* m_proxy; + basic_substring m_str; + size_t m_pos; + NCC_ m_sep; + + split_iterator_impl(split_proxy_impl const* proxy, size_t pos, C sep) + : m_proxy(proxy), m_pos(pos), m_sep(sep) + { + _tick(); + } + + void _tick() + { + m_proxy->m_str.next_split(m_sep, &m_pos, &m_str); + } + + split_iterator_impl& operator++ () { _tick(); return *this; } + split_iterator_impl operator++ (int) { split_iterator_impl it = *this; _tick(); return it; } + + basic_substring& operator* () { return m_str; } + basic_substring* operator-> () { return &m_str; } + + bool operator!= (split_iterator_impl const& that) const + { + return !(this->operator==(that)); + } + bool operator== (split_iterator_impl const& that) const + { + C4_XASSERT((m_sep == that.m_sep) && "cannot compare split iterators with different separators"); + if(m_str.size() != that.m_str.size()) + return false; + if(m_str.data() != that.m_str.data()) + return false; + return m_pos == that.m_pos; + } + }; + + basic_substring m_str; + size_t m_start_pos; + C m_sep; + + split_proxy_impl(basic_substring str_, size_t start_pos, C sep) + : m_str(str_), m_start_pos(start_pos), m_sep(sep) + { + } + + split_iterator_impl begin() const + { + auto it = split_iterator_impl(this, m_start_pos, m_sep); + return it; + } + split_iterator_impl end() const + { + size_t pos = m_str.size() + 1; + auto it = split_iterator_impl(this, pos, m_sep); + return it; + } + }; + +public: + + using split_proxy = split_proxy_impl; + + /** a view into the splits */ + split_proxy split(C sep, size_t start_pos=0) const + { + C4_XASSERT((start_pos >= 0 && start_pos < len) || empty()); + auto ss = sub(0, len); + auto it = split_proxy(ss, start_pos, sep); + return it; + } + +public: + + /** pop right: return the first split from the right. Use + * gpop_left() to get the reciprocal part. + */ + basic_substring pop_right(C sep=C('/'), bool skip_empty=false) const + { + if(C4_LIKELY(len > 1)) + { + auto pos = last_of(sep); + if(pos != npos) + { + if(pos + 1 < len) // does not end with sep + { + return sub(pos + 1); // return from sep to end + } + else // the string ends with sep + { + if( ! skip_empty) + { + return sub(pos + 1, 0); + } + auto ppos = last_not_of(sep); // skip repeated seps + if(ppos == npos) // the string is all made of seps + { + return sub(0, 0); + } + // find the previous sep + auto pos0 = last_of(sep, ppos); + if(pos0 == npos) // only the last sep exists + { + return sub(0); // return the full string (because skip_empty is true) + } + ++pos0; + return sub(pos0); + } + } + else // no sep was found, return the full string + { + return *this; + } + } + else if(len == 1) + { + if(begins_with(sep)) + { + return sub(0, 0); + } + return *this; + } + else // an empty string + { + return basic_substring(); + } + } + + /** return the first split from the left. Use gpop_right() to get + * the reciprocal part. */ + basic_substring pop_left(C sep = C('/'), bool skip_empty=false) const + { + if(C4_LIKELY(len > 1)) + { + auto pos = first_of(sep); + if(pos != npos) + { + if(pos > 0) // does not start with sep + { + return sub(0, pos); // return everything up to it + } + else // the string starts with sep + { + if( ! skip_empty) + { + return sub(0, 0); + } + auto ppos = first_not_of(sep); // skip repeated seps + if(ppos == npos) // the string is all made of seps + { + return sub(0, 0); + } + // find the next sep + auto pos0 = first_of(sep, ppos); + if(pos0 == npos) // only the first sep exists + { + return sub(0); // return the full string (because skip_empty is true) + } + C4_XASSERT(pos0 > 0); + // return everything up to the second sep + return sub(0, pos0); + } + } + else // no sep was found, return the full string + { + return sub(0); + } + } + else if(len == 1) + { + if(begins_with(sep)) + { + return sub(0, 0); + } + return sub(0); + } + else // an empty string + { + return basic_substring(); + } + } + +public: + + /** greedy pop left. eg, csubstr("a/b/c").gpop_left('/')="c" */ + basic_substring gpop_left(C sep = C('/'), bool skip_empty=false) const + { + auto ss = pop_right(sep, skip_empty); + ss = left_of(ss); + if(ss.find(sep) != npos) + { + if(ss.ends_with(sep)) + { + if(skip_empty) + { + ss = ss.trimr(sep); + } + else + { + ss = ss.sub(0, ss.len-1); // safe to subtract because ends_with(sep) is true + } + } + } + return ss; + } + + /** greedy pop right. eg, csubstr("a/b/c").gpop_right('/')="a" */ + basic_substring gpop_right(C sep = C('/'), bool skip_empty=false) const + { + auto ss = pop_left(sep, skip_empty); + ss = right_of(ss); + if(ss.find(sep) != npos) + { + if(ss.begins_with(sep)) + { + if(skip_empty) + { + ss = ss.triml(sep); + } + else + { + ss = ss.sub(1); + } + } + } + return ss; + } + + /** @} */ + +public: + + /** @name Path-like manipulation methods */ + /** @{ */ + + basic_substring basename(C sep=C('/')) const + { + auto ss = pop_right(sep, /*skip_empty*/true); + ss = ss.trimr(sep); + return ss; + } + + basic_substring dirname(C sep=C('/')) const + { + auto ss = basename(sep); + ss = ss.empty() ? *this : left_of(ss); + return ss; + } + + C4_ALWAYS_INLINE basic_substring name_wo_extshort() const + { + return gpop_left('.'); + } + + C4_ALWAYS_INLINE basic_substring name_wo_extlong() const + { + return pop_left('.'); + } + + C4_ALWAYS_INLINE basic_substring extshort() const + { + return pop_right('.'); + } + + C4_ALWAYS_INLINE basic_substring extlong() const + { + return gpop_right('.'); + } + + /** @} */ + +public: + + /** @name Content-modification methods (only for non-const C) */ + /** @{ */ + + /** convert the string to upper-case + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(void) toupper() + { + for(size_t i = 0; i < len; ++i) + { + str[i] = static_cast(::toupper(str[i])); + } + } + + /** convert the string to lower-case + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(void) tolower() + { + for(size_t i = 0; i < len; ++i) + { + str[i] = static_cast(::tolower(str[i])); + } + } + +public: + + /** fill the entire contents with the given @p val + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(void) fill(C val) + { + for(size_t i = 0; i < len; ++i) + { + str[i] = val; + } + } + +public: + + /** set the current substring to a copy of the given csubstr + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(void) copy_from(ro_substr that, size_t ifirst=0, size_t num=npos) + { + C4_ASSERT(ifirst >= 0 && ifirst <= len); + num = num != npos ? num : len - ifirst; + num = num < that.len ? num : that.len; + C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(num) + memcpy(str + sizeof(C) * ifirst, that.str, sizeof(C) * num); + } + +public: + + /** reverse in place + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(void) reverse() + { + if(len == 0) return; + detail::_do_reverse(str, str + len - 1); + } + + /** revert a subpart in place + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(void) reverse_sub(size_t ifirst, size_t num) + { + C4_ASSERT(ifirst >= 0 && ifirst <= len); + C4_ASSERT(ifirst + num >= 0 && ifirst + num <= len); + if(num == 0) return; + detail::_do_reverse(str + ifirst, str + ifirst + num - 1); + } + + /** revert a range in place + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(void) reverse_range(size_t ifirst, size_t ilast) + { + C4_ASSERT(ifirst >= 0 && ifirst <= len); + C4_ASSERT(ilast >= 0 && ilast <= len); + if(ifirst == ilast) return; + detail::_do_reverse(str + ifirst, str + ilast - 1); + } + +public: + + /** erase part of the string. eg, with char s[] = "0123456789", + * substr(s).erase(3, 2) = "01256789", and s is now "01245678989" + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(basic_substring) erase(size_t pos, size_t num) + { + C4_ASSERT(pos >= 0 && pos+num <= len); + size_t num_to_move = len - pos - num; + memmove(str + pos, str + pos + num, sizeof(C) * num_to_move); + return basic_substring{str, len - num}; + } + + /** @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(basic_substring) erase_range(size_t first, size_t last) + { + C4_ASSERT(first <= last); + return erase(first, static_cast(last-first)); + } + + /** erase a part of the string. + * @note @p sub must be a substring of this string + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(basic_substring) erase(ro_substr sub) + { + C4_ASSERT(is_super(sub)); + C4_ASSERT(sub.str >= str); + return erase(static_cast(sub.str - str), sub.len); + } + +public: + + /** replace every occurrence of character @p value with the character @p repl + * @return the number of characters that were replaced + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(size_t) replace(C value, C repl, size_t pos=0) + { + C4_ASSERT((pos >= 0 && pos <= len) || pos == npos); + size_t did_it = 0; + while((pos = find(value, pos)) != npos) + { + str[pos++] = repl; + ++did_it; + } + return did_it; + } + + /** replace every occurrence of each character in @p value with + * the character @p repl. + * @return the number of characters that were replaced + * @note this method requires that the string memory is writeable and is SFINAEd out for const C */ + C4_REQUIRE_RW(size_t) replace(ro_substr chars, C repl, size_t pos=0) + { + C4_ASSERT((pos >= 0 && pos <= len) || pos == npos); + size_t did_it = 0; + while((pos = first_of(chars, pos)) != npos) + { + str[pos++] = repl; + ++did_it; + } + return did_it; + } + + /** replace @p pattern with @p repl, and write the result into + * @dst. pattern and repl don't need equal sizes. + * + * @return the required size for dst. No overflow occurs if + * dst.len is smaller than the required size; this can be used to + * determine the required size for an existing container. */ + size_t replace_all(rw_substr dst, ro_substr pattern, ro_substr repl, size_t pos=0) const + { + C4_ASSERT( ! pattern.empty()); //!< @todo relax this precondition + C4_ASSERT( ! this ->overlaps(dst)); //!< @todo relax this precondition + C4_ASSERT( ! pattern.overlaps(dst)); + C4_ASSERT( ! repl .overlaps(dst)); + C4_ASSERT((pos >= 0 && pos <= len) || pos == npos); + C4_SUPPRESS_WARNING_GCC_PUSH + C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc11 has a false positive here + #if (!defined(__clang__)) && (defined(__GNUC__) && (__GNUC__ >= 7)) + C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc11 has a false positive here + #endif + #define _c4append(first, last) \ + { \ + C4_ASSERT((last) >= (first)); \ + size_t num = static_cast((last) - (first)); \ + if(num > 0 && sz + num <= dst.len) \ + { \ + memcpy(dst.str + sz, first, num * sizeof(C)); \ + } \ + sz += num; \ + } + size_t sz = 0; + size_t b = pos; + _c4append(str, str + pos); + do { + size_t e = find(pattern, b); + if(e == npos) + { + _c4append(str + b, str + len); + break; + } + _c4append(str + b, str + e); + _c4append(repl.begin(), repl.end()); + b = e + pattern.size(); + } while(b < len && b != npos); + return sz; + #undef _c4append + C4_SUPPRESS_WARNING_GCC_POP + } + + /** @} */ + +}; // template class basic_substring + + +#undef C4_REQUIRE_RW +#undef C4_REQUIRE_RO +#undef C4_NC2C + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** Because of a C++ limitation, substr cannot provide simultaneous + * overloads for constructing from a char[N] and a char*; the latter + * will always be chosen by the compiler. So this specialization is + * provided to simplify obtaining a substr from a char*. Being a + * function has the advantage of highlighting the strlen() cost. + * + * @see to_csubstr + * @see For a more detailed explanation on why the overloads cannot + * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ +inline substr to_substr(char *s) +{ + return substr(s, s ? strlen(s) : 0); +} + +/** Because of a C++ limitation, substr cannot provide simultaneous + * overloads for constructing from a char[N] and a char*; the latter + * will always be chosen by the compiler. So this specialization is + * provided to simplify obtaining a substr from a char*. Being a + * function has the advantage of highlighting the strlen() cost. + * + * @see to_substr + * @see For a more detailed explanation on why the overloads cannot + * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ +inline csubstr to_csubstr(char *s) +{ + return csubstr(s, s ? strlen(s) : 0); +} + +/** Because of a C++ limitation, substr cannot provide simultaneous + * overloads for constructing from a const char[N] and a const char*; + * the latter will always be chosen by the compiler. So this + * specialization is provided to simplify obtaining a substr from a + * char*. Being a function has the advantage of highlighting the + * strlen() cost. + * + * @overload to_csubstr + * @see to_substr + * @see For a more detailed explanation on why the overloads cannot + * coexist, see http://cplusplus.bordoon.com/specializeForCharacterArrays.html */ +inline csubstr to_csubstr(const char *s) +{ + return csubstr(s, s ? strlen(s) : 0); +} + + +/** neutral version for use in generic code */ +inline csubstr to_csubstr(csubstr s) +{ + return s; +} + +/** neutral version for use in generic code */ +inline csubstr to_csubstr(substr s) +{ + return s; +} + +/** neutral version for use in generic code */ +inline substr to_substr(substr s) +{ + return s; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template inline bool operator== (const C (&s)[N], basic_substring const that) { return that.compare(s) == 0; } +template inline bool operator!= (const C (&s)[N], basic_substring const that) { return that.compare(s) != 0; } +template inline bool operator< (const C (&s)[N], basic_substring const that) { return that.compare(s) > 0; } +template inline bool operator> (const C (&s)[N], basic_substring const that) { return that.compare(s) < 0; } +template inline bool operator<= (const C (&s)[N], basic_substring const that) { return that.compare(s) >= 0; } +template inline bool operator>= (const C (&s)[N], basic_substring const that) { return that.compare(s) <= 0; } + +template inline bool operator== (C const c, basic_substring const that) { return that.compare(c) == 0; } +template inline bool operator!= (C const c, basic_substring const that) { return that.compare(c) != 0; } +template inline bool operator< (C const c, basic_substring const that) { return that.compare(c) > 0; } +template inline bool operator> (C const c, basic_substring const that) { return that.compare(c) < 0; } +template inline bool operator<= (C const c, basic_substring const that) { return that.compare(c) >= 0; } +template inline bool operator>= (C const c, basic_substring const that) { return that.compare(c) <= 0; } + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @define C4_SUBSTR_NO_OSTREAM_LSHIFT doctest does not deal well with + * template operator<< + * @see https://github.com/onqtam/doctest/pull/431 */ +#ifndef C4_SUBSTR_NO_OSTREAM_LSHIFT +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wsign-conversion" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + +/** output the string to a stream */ +template +inline OStream& operator<< (OStream& os, basic_substring s) +{ + os.write(s.str, s.len); + return os; +} + +// this causes ambiguity +///** this is used by google test */ +//template +//inline void PrintTo(basic_substring s, OStream* os) +//{ +// os->write(s.str, s.len); +//} + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif +#endif // !C4_SUBSTR_NO_OSTREAM_LSHIFT + +} // namespace c4 + + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* _C4_SUBSTR_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/substr.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/ext/fast_float.hpp +// https://github.com/biojppm/c4core/src/c4/ext/fast_float.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_EXT_FAST_FLOAT_HPP_ +#define _C4_EXT_FAST_FLOAT_HPP_ + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe +#elif defined(__clang__) || defined(__APPLE_CC__) || defined(_LIBCPP_VERSION) +# pragma clang diagnostic push +# if (defined(__clang_major__) && _clang_major__ >= 9) || defined(__APPLE_CC__) +# pragma clang diagnostic ignored "-Wfortify-source" +# endif +# pragma clang diagnostic ignored "-Wshift-count-overflow" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wuseless-cast" +#endif + +// fast_float by Daniel Lemire +// fast_float by João Paulo Magalhaes + + +// with contributions from Eugene Golushkov +// with contributions from Maksim Kita +// with contributions from Marcin Wojdyr +// with contributions from Neal Richardson +// with contributions from Tim Paine +// with contributions from Fabio Pellacini + + +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + + +#ifndef FASTFLOAT_FAST_FLOAT_H +#define FASTFLOAT_FAST_FLOAT_H + +#include + +namespace fast_float { +enum chars_format { + scientific = 1<<0, + fixed = 1<<2, + hex = 1<<3, + general = fixed | scientific +}; + + +struct from_chars_result { + const char *ptr; + std::errc ec; +}; + +struct parse_options { + constexpr explicit parse_options(chars_format fmt = chars_format::general, + char dot = '.') + : format(fmt), decimal_point(dot) {} + + /** Which number formats are accepted */ + chars_format format; + /** The character used as decimal point */ + char decimal_point; +}; + +/** + * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting + * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. + * The resulting floating-point value is the closest floating-point values (using either float or double), + * using the "round to even" convention for values that would otherwise fall right in-between two values. + * That is, we provide exact parsing according to the IEEE standard. + * + * Given a successful parse, the pointer (`ptr`) in the returned value is set to point right after the + * parsed number, and the `value` referenced is set to the parsed value. In case of error, the returned + * `ec` contains a representative error, otherwise the default (`std::errc()`) value is stored. + * + * The implementation does not throw and does not allocate memory (e.g., with `new` or `malloc`). + * + * Like the C++17 standard, the `fast_float::from_chars` functions take an optional last argument of + * the type `fast_float::chars_format`. It is a bitset value: we check whether + * `fmt & fast_float::chars_format::fixed` and `fmt & fast_float::chars_format::scientific` are set + * to determine whether we allowe the fixed point and scientific notation respectively. + * The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`. + */ +template +from_chars_result from_chars(const char *first, const char *last, + T &value, chars_format fmt = chars_format::general) noexcept; + +/** + * Like from_chars, but accepts an `options` argument to govern number parsing. + */ +template +from_chars_result from_chars_advanced(const char *first, const char *last, + T &value, parse_options options) noexcept; + +} +#endif // FASTFLOAT_FAST_FLOAT_H + + +#ifndef FASTFLOAT_FLOAT_COMMON_H +#define FASTFLOAT_FLOAT_COMMON_H + +#include +//included above: +//#include +#include +//included above: +//#include + +#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) \ + || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \ + || defined(__MINGW64__) \ + || defined(__s390x__) \ + || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) \ + || defined(__EMSCRIPTEN__)) +#define FASTFLOAT_64BIT +#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \ + || defined(__arm__) || defined(_M_ARM) \ + || defined(__MINGW32__)) +#define FASTFLOAT_32BIT +#else + // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow. + // We can never tell the register width, but the SIZE_MAX is a good approximation. + // UINTPTR_MAX and INTPTR_MAX are optional, so avoid them for max portability. + #if SIZE_MAX == 0xffff + #error Unknown platform (16-bit, unsupported) + #elif SIZE_MAX == 0xffffffff + #define FASTFLOAT_32BIT + #elif SIZE_MAX == 0xffffffffffffffff + #define FASTFLOAT_64BIT + #else + #error Unknown platform (not 32-bit, not 64-bit?) + #endif +#endif + +#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__)) +//included above: +//#include +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define FASTFLOAT_VISUAL_STUDIO 1 +#endif + +#ifdef _WIN32 +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#if defined(__APPLE__) || defined(__FreeBSD__) +#include +#elif defined(sun) || defined(__sun) +#include +#else +#include +#endif +# +#ifndef __BYTE_ORDER__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#ifndef __ORDER_LITTLE_ENDIAN__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#define FASTFLOAT_IS_BIG_ENDIAN 1 +#endif +#endif + +#ifdef FASTFLOAT_VISUAL_STUDIO +#define fastfloat_really_inline __forceinline +#else +#define fastfloat_really_inline inline __attribute__((always_inline)) +#endif + +#ifndef FASTFLOAT_ASSERT +#define FASTFLOAT_ASSERT(x) { if (!(x)) abort(); } +#endif + +#ifndef FASTFLOAT_DEBUG_ASSERT +//included above: +//#include +#define FASTFLOAT_DEBUG_ASSERT(x) assert(x) +#endif + +// rust style `try!()` macro, or `?` operator +#define FASTFLOAT_TRY(x) { if (!(x)) return false; } + +namespace fast_float { + +// Compares two ASCII strings in a case insensitive manner. +inline bool fastfloat_strncasecmp(const char *input1, const char *input2, + size_t length) { + char running_diff{0}; + for (size_t i = 0; i < length; i++) { + running_diff |= (input1[i] ^ input2[i]); + } + return (running_diff == 0) || (running_diff == 32); +} + +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif + +// a pointer and a length to a contiguous block of memory +template +struct span { + const T* ptr; + size_t length; + span(const T* _ptr, size_t _length) : ptr(_ptr), length(_length) {} + span() : ptr(nullptr), length(0) {} + + constexpr size_t len() const noexcept { + return length; + } + + const T& operator[](size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return ptr[index]; + } +}; + +struct value128 { + uint64_t low; + uint64_t high; + value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {} + value128() : low(0), high(0) {} +}; + +/* result might be undefined when input_num is zero */ +fastfloat_really_inline int leading_zeroes(uint64_t input_num) { + assert(input_num > 0); +#ifdef FASTFLOAT_VISUAL_STUDIO + #if defined(_M_X64) || defined(_M_ARM64) + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + _BitScanReverse64(&leading_zero, input_num); + return (int)(63 - leading_zero); + #else + int last_bit = 0; + if(input_num & uint64_t(0xffffffff00000000)) input_num >>= 32, last_bit |= 32; + if(input_num & uint64_t( 0xffff0000)) input_num >>= 16, last_bit |= 16; + if(input_num & uint64_t( 0xff00)) input_num >>= 8, last_bit |= 8; + if(input_num & uint64_t( 0xf0)) input_num >>= 4, last_bit |= 4; + if(input_num & uint64_t( 0xc)) input_num >>= 2, last_bit |= 2; + if(input_num & uint64_t( 0x2)) input_num >>= 1, last_bit |= 1; + return 63 - last_bit; + #endif +#else + return __builtin_clzll(input_num); +#endif +} + +#ifdef FASTFLOAT_32BIT + +// slow emulation routine for 32-bit +fastfloat_really_inline uint64_t emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} + +// slow emulation routine for 32-bit +#if !defined(__MINGW64__) +fastfloat_really_inline uint64_t _umul128(uint64_t ab, uint64_t cd, + uint64_t *hi) { + uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif // !__MINGW64__ + +#endif // FASTFLOAT_32BIT + + +// compute 64-bit a*b +fastfloat_really_inline value128 full_multiplication(uint64_t a, + uint64_t b) { + value128 answer; +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emulate + answer.high = __umulh(a, b); + answer.low = a * b; +#elif defined(FASTFLOAT_32BIT) || (defined(_WIN64) && !defined(__clang__)) + answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 +#elif defined(FASTFLOAT_64BIT) + __uint128_t r = ((__uint128_t)a) * b; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#else + #error Not implemented +#endif + return answer; +} + +struct adjusted_mantissa { + uint64_t mantissa{0}; + int32_t power2{0}; // a negative value indicates an invalid result + adjusted_mantissa() = default; + bool operator==(const adjusted_mantissa &o) const { + return mantissa == o.mantissa && power2 == o.power2; + } + bool operator!=(const adjusted_mantissa &o) const { + return mantissa != o.mantissa || power2 != o.power2; + } +}; + +// Bias so we can get the real exponent with an invalid adjusted_mantissa. +constexpr static int32_t invalid_am_bias = -0x8000; + +constexpr static double powers_of_ten_double[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; +constexpr static float powers_of_ten_float[] = {1e0, 1e1, 1e2, 1e3, 1e4, 1e5, + 1e6, 1e7, 1e8, 1e9, 1e10}; + +template struct binary_format { + static inline constexpr int mantissa_explicit_bits(); + static inline constexpr int minimum_exponent(); + static inline constexpr int infinite_power(); + static inline constexpr int sign_index(); + static inline constexpr int min_exponent_fast_path(); + static inline constexpr int max_exponent_fast_path(); + static inline constexpr int max_exponent_round_to_even(); + static inline constexpr int min_exponent_round_to_even(); + static inline constexpr uint64_t max_mantissa_fast_path(); + static inline constexpr int largest_power_of_ten(); + static inline constexpr int smallest_power_of_ten(); + static inline constexpr T exact_power_of_ten(int64_t power); + static inline constexpr size_t max_digits(); +}; + +template <> inline constexpr int binary_format::mantissa_explicit_bits() { + return 52; +} +template <> inline constexpr int binary_format::mantissa_explicit_bits() { + return 23; +} + +template <> inline constexpr int binary_format::max_exponent_round_to_even() { + return 23; +} + +template <> inline constexpr int binary_format::max_exponent_round_to_even() { + return 10; +} + +template <> inline constexpr int binary_format::min_exponent_round_to_even() { + return -4; +} + +template <> inline constexpr int binary_format::min_exponent_round_to_even() { + return -17; +} + +template <> inline constexpr int binary_format::minimum_exponent() { + return -1023; +} +template <> inline constexpr int binary_format::minimum_exponent() { + return -127; +} + +template <> inline constexpr int binary_format::infinite_power() { + return 0x7FF; +} +template <> inline constexpr int binary_format::infinite_power() { + return 0xFF; +} + +template <> inline constexpr int binary_format::sign_index() { return 63; } +template <> inline constexpr int binary_format::sign_index() { return 31; } + +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -22; +#endif +} +template <> inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -10; +#endif +} + +template <> inline constexpr int binary_format::max_exponent_fast_path() { + return 22; +} +template <> inline constexpr int binary_format::max_exponent_fast_path() { + return 10; +} + +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} +template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} + +template <> +inline constexpr double binary_format::exact_power_of_ten(int64_t power) { + return powers_of_ten_double[power]; +} +template <> +inline constexpr float binary_format::exact_power_of_ten(int64_t power) { + + return powers_of_ten_float[power]; +} + + +template <> +inline constexpr int binary_format::largest_power_of_ten() { + return 308; +} +template <> +inline constexpr int binary_format::largest_power_of_ten() { + return 38; +} + +template <> +inline constexpr int binary_format::smallest_power_of_ten() { + return -342; +} +template <> +inline constexpr int binary_format::smallest_power_of_ten() { + return -65; +} + +template <> inline constexpr size_t binary_format::max_digits() { + return 769; +} +template <> inline constexpr size_t binary_format::max_digits() { + return 114; +} + +template +fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) { + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) << binary_format::mantissa_explicit_bits(); + word = negative + ? word | (uint64_t(1) << binary_format::sign_index()) : word; +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + if (std::is_same::value) { + ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian + } else { + ::memcpy(&value, &word, sizeof(T)); + } +#else + // For little-endian systems: + ::memcpy(&value, &word, sizeof(T)); +#endif +} + +} // namespace fast_float + +#endif + + +#ifndef FASTFLOAT_ASCII_NUMBER_H +#define FASTFLOAT_ASCII_NUMBER_H + +//included above: +//#include +//included above: +//#include +//included above: +//#include +#include + + +namespace fast_float { + +// Next function can be micro-optimized, but compilers are entirely +// able to optimize it well. +fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; } + +fastfloat_really_inline uint64_t byteswap(uint64_t val) { + return (val & 0xFF00000000000000) >> 56 + | (val & 0x00FF000000000000) >> 40 + | (val & 0x0000FF0000000000) >> 24 + | (val & 0x000000FF00000000) >> 8 + | (val & 0x00000000FF000000) << 8 + | (val & 0x0000000000FF0000) << 24 + | (val & 0x000000000000FF00) << 40 + | (val & 0x00000000000000FF) << 56; +} + +fastfloat_really_inline uint64_t read_u64(const char *chars) { + uint64_t val; + ::memcpy(&val, chars, sizeof(uint64_t)); +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + // Need to read as-if the number was in little-endian order. + val = byteswap(val); +#endif + return val; +} + +fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) { +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + // Need to read as-if the number was in little-endian order. + val = byteswap(val); +#endif + ::memcpy(chars, &val, sizeof(uint64_t)); +} + +// credit @aqrit +fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return uint32_t(val); +} + +fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { + return parse_eight_digits_unrolled(read_u64(chars)); +} + +// credit @aqrit +fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { + return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + 0x8080808080808080)); +} + +fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { + return is_made_of_eight_digits_fast(read_u64(chars)); +} + +typedef span byte_span; + +struct parsed_number_string { + int64_t exponent{0}; + uint64_t mantissa{0}; + const char *lastmatch{nullptr}; + bool negative{false}; + bool valid{false}; + bool too_many_digits{false}; + // contains the range of the significant digits + byte_span integer{}; // non-nullable + byte_span fraction{}; // nullable +}; + +// Assuming that you use no more than 19 digits, this will +// parse an ASCII string. +fastfloat_really_inline +parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { + const chars_format fmt = options.format; + const char decimal_point = options.decimal_point; + + parsed_number_string answer; + answer.valid = false; + answer.too_many_digits = false; + answer.negative = (*p == '-'); + if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + ++p; + if (p == pend) { + return answer; + } + if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot + return answer; + } + } + const char *const start_digits = p; + + uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) + + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok + p += 8; + } + while ((p != pend) && is_integer(*p)) { + // a multiplication by 10 is cheaper than an arbitrary integer + // multiplication + i = 10 * i + + uint64_t(*p - '0'); // might overflow, we will handle the overflow later + ++p; + } + const char *const end_of_integer_part = p; + int64_t digit_count = int64_t(end_of_integer_part - start_digits); + answer.integer = byte_span(start_digits, size_t(digit_count)); + int64_t exponent = 0; + if ((p != pend) && (*p == decimal_point)) { + ++p; + const char* before = p; + // can occur at most twice without overflowing, but let it occur more, since + // for integers with many digits, digit parsing is the primary bottleneck. + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok + p += 8; + } + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + } + exponent = before - p; + answer.fraction = byte_span(before, size_t(p - before)); + digit_count -= exponent; + } + // we must have encountered at least one integer! + if (digit_count == 0) { + return answer; + } + int64_t exp_number = 0; // explicit exponential part + if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { + const char * location_of_e = p; + ++p; + bool neg_exp = false; + if ((p != pend) && ('-' == *p)) { + neg_exp = true; + ++p; + } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + ++p; + } + if ((p == pend) || !is_integer(*p)) { + if(!(fmt & chars_format::fixed)) { + // We are in error. + return answer; + } + // Otherwise, we will be ignoring the 'e'. + p = location_of_e; + } else { + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + if(neg_exp) { exp_number = - exp_number; } + exponent += exp_number; + } + } else { + // If it scientific and not fixed, we have to bail out. + if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } + } + answer.lastmatch = p; + answer.valid = true; + + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon. + // + // We can deal with up to 19 digits. + if (digit_count > 19) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + // We need to be mindful of the case where we only have zeroes... + // E.g., 0.000000000...000. + const char *start = start_digits; + while ((start != pend) && (*start == '0' || *start == decimal_point)) { + if(*start == '0') { digit_count --; } + start++; + } + if (digit_count > 19) { + answer.too_many_digits = true; + // Let us start again, this time, avoiding overflows. + // We don't need to check if is_integer, since we use the + // pre-tokenized spans from above. + i = 0; + p = answer.integer.ptr; + const char* int_end = p + answer.integer.len(); + const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; + while((i < minimal_nineteen_digit_integer) && (p != int_end)) { + i = i * 10 + uint64_t(*p - '0'); + ++p; + } + if (i >= minimal_nineteen_digit_integer) { // We have a big integers + exponent = end_of_integer_part - p + exp_number; + } else { // We have a value with a fractional component. + p = answer.fraction.ptr; + const char* frac_end = p + answer.fraction.len(); + while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - '0'); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; + } + // We have now corrected both exponent and i, to a truncated value + } + } + answer.exponent = exponent; + answer.mantissa = i; + return answer; +} + +} // namespace fast_float + +#endif + + +#ifndef FASTFLOAT_FAST_TABLE_H +#define FASTFLOAT_FAST_TABLE_H + +//included above: +//#include + +namespace fast_float { + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + +/** + * The smallest non-zero float (binary64) is 2^−1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^−1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +template +struct powers_template { + +constexpr static int smallest_power_of_five = binary_format::smallest_power_of_ten(); +constexpr static int largest_power_of_five = binary_format::largest_power_of_ten(); +constexpr static int number_of_entries = 2 * (largest_power_of_five - smallest_power_of_five + 1); +// Powers of five from 5^-342 all the way to 5^308 rounded toward one. +static const uint64_t power_of_five_128[number_of_entries]; +}; + +template +const uint64_t powers_template::power_of_five_128[number_of_entries] = { + 0xeef453d6923bd65a,0x113faa2906a13b3f, + 0x9558b4661b6565f8,0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, + 0xe95a99df8ace6f53,0xf4d82c2c107973dc, + 0x91d8a02bb6c10594,0x79071b9b8a4be869, + 0xb64ec836a47146f9,0x9748e2826cdee284, + 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f,0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723,0xad2c788035e61382, + 0x8b16fb203055ac76,0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78,0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b,0x8672648c40e5ad68, + 0xa9c98d8ccb009506,0x680efdaf511f18c2, + 0xd43bf0effdc0ba48,0x212bd1b2566def2, + 0x84a57695fe98746d,0x14bb630f7604b57, + 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, + 0xcf42894a5dce35ea,0x52064cac828675b9, + 0x818995ce7aa0e1b2,0x7343efebd1940993, + 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6,0xd41a26e077774ef6, + 0xfd00b897478238d0,0x8920b098955522b4, + 0x9e20735e8cb16382,0x55b46e5f5d5535b0, + 0xc5a890362fddbc62,0xeb2189f734aa831d, + 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d,0x47b233c92125366e, + 0xc1069cd4eabe89f8,0x999ec0bb696e840a, + 0xf148440a256e2c76,0xc00670ea43ca250d, + 0x96cd2a865764dbca,0x380406926a5e5728, + 0xbc807527ed3e12bc,0xc605083704f5ecf2, + 0xeba09271e88d976b,0xf7864a44c633682e, + 0x93445b8731587ea3,0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c,0x5960ea05bad82964, + 0xe61acf033d1a45df,0x6fb92487298e33bd, + 0x8fd0c16206306bab,0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696,0x8f48a4899877186c, + 0xe0b62e2929aba83c,0x331acdabfe94de87, + 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, + 0x892731ac9faf056e,0xbe311c083a225cd2, + 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, + 0xd64d3d9db981787d,0x92cbbccdad5b108, + 0x85f0468293f0eb4e,0x25bbf56008c58ea5, + 0xa76c582338ed2621,0xaf2af2b80af6f24e, + 0xd1476e2c07286faa,0x1af5af660db4aee1, + 0x82cca4db847945ca,0x50d98d9fc890ed4d, + 0xa37fce126597973c,0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1,0x77b191618c54e9ac, + 0xc795830d75038c1d,0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, + 0x9becce62836ac577,0x4ee367f9430aec32, + 0xc2e801fb244576d5,0x229c41f793cda73f, + 0xf3a20279ed56d48a,0x6b43527578c1110f, + 0x9845418c345644d6,0x830a13896b78aaa9, + 0xbe5691ef416bd60c,0x23cc986bc656d553, + 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, + 0x91376c36d99995be,0x23100809b9c21fa1, + 0xb58547448ffffb2d,0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9,0x16c90c8f323f516c, + 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, + 0xb1442798f49ffb4a,0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d,0x40405643d711d583, + 0x8a7d3eef7f1cfc52,0x482835ea666b2572, + 0xad1c8eab5ee43b66,0xda3243650005eecf, + 0xd863b256369d4a40,0x90bed43e40076a82, + 0x873e4f75e2224e68,0x5a7744a6e804a291, + 0xa90de3535aaae202,0x711515d0a205cb36, + 0xd3515c2831559a83,0xd5a5b44ca873e03, + 0x8412d9991ed58091,0xe858790afe9486c2, + 0xa5178fff668ae0b6,0x626e974dbe39a872, + 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, + 0xa139029f6a239f72,0x1c1fffc1ebc44e80, + 0xc987434744ac874e,0xa327ffb266b56220, + 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, + 0xc4ce17b399107c22,0xcb550fb4384d21d3, + 0xf6019da07f549b2b,0x7e2a53a146606a48, + 0x99c102844f94e0fb,0x2eda7444cbfc426d, + 0xc0314325637a1939,0xfa911155fefb5308, + 0xf03d93eebc589f88,0x793555ab7eba27ca, + 0x96267c7535b763b5,0x4bc1558b2f3458de, + 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb,0x465e15a979c1cadc, + 0x92a1958a7675175f,0xbfacd89ec191ec9, + 0xb749faed14125d36,0xcef980ec671f667b, + 0xe51c79a85916f484,0x82b7e12780e7401a, + 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9,0x67a791e093e1d49a, + 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d,0x58fae9f773886e18, + 0xda7f5bf590966848,0xaf39a475506a899e, + 0x888f99797a5e012d,0x6d8406c952429603, + 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26,0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, + 0xd0601d8efc57b08b,0xf13b94daf124da26, + 0x823c12795db6ce57,0x76c53d08d6b70858, + 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02,0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a,0x359ab6419ca1091b, + 0xf867241c8cc6d4c0,0xc30163d203c94b62, + 0x9b407691d7fc44f8,0x79e0de63425dcf1d, + 0xc21094364dfb5636,0x985915fc12f542e4, + 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, + 0xbd8430bd08277231,0x50c6ff782a838353, + 0xece53cec4a314ebd,0xa4f8bf5635246428, + 0x940f4613ae5ed136,0x871b7795e136be99, + 0xb913179899f68584,0x28e2557b59846e3f, + 0xe757dd7ec07426e5,0x331aeada2fe589cf, + 0x9096ea6f3848984f,0x3ff0d2c85def7621, + 0xb4bca50b065abe63,0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, + 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, + 0xb080392cc4349dec,0xbd8d794d96aacfb3, + 0xdca04777f541c567,0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60,0xf41686c49db57244, + 0xac5d37d5b79b6239,0x311c2875c522ced5, + 0xd77485cb25823ac7,0x7d633293366b828b, + 0x86a8d39ef77164bc,0xae5dff9c02033197, + 0xa8530886b54dbdeb,0xd9f57f830283fdfc, + 0xd267caa862a12d66,0xd072df63c324fd7b, + 0x8380dea93da4bc60,0x4247cb9e59f71e6d, + 0xa46116538d0deb78,0x52d9be85f074e608, + 0xcd795be870516656,0x67902e276c921f8b, + 0x806bd9714632dff6,0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c,0x796b805720085f81, + 0x9cc3a6eec6311a63,0xcbe3303674053bb0, + 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b,0xee92fb5515482d44, + 0x991711052d8bf3c5,0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6,0xd262d45a78a0635d, + 0xef340a98172aace4,0x86fb897116c87c34, + 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, + 0xbae0a846d2195712,0x8974836059cca109, + 0xe998d258869facd7,0x2bd1a438703fc94b, + 0x91ff83775423cc06,0x7b6306a34627ddcf, + 0xb67f6455292cbf08,0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, + 0x8e938662882af53e,0x547eb47b7282ee9c, + 0xb23867fb2a35b28d,0xe99e619a4f23aa43, + 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, + 0xae0b158b4738705e,0x9624ab50b148d445, + 0xd98ddaee19068c76,0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b,0x7647c3200069671f, + 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, + 0xa5fb0a17c777cf09,0xf468107100525890, + 0xcf79cc9db955c2cc,0x7182148d4066eeb4, + 0x81ac1fe293d599bf,0xc6f14cd848405530, + 0xa21727db38cb002f,0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, + 0xfd442e4688bd304a,0x908f4a166d1da663, + 0x9e4a9cec15763e2e,0x9a598e4e043287fe, + 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, + 0xf7549530e188c128,0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, + 0xc13a148e3032d6e7,0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, + 0xebdf661791d60f56,0x111b495b3464ad21, + 0x936b9fcebb25c995,0xcab10dd900beec34, + 0xb84687c269ef3bfb,0x3d5d514f40eea742, + 0xe65829b3046b0afa,0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, + 0xb3f4e093db73a093,0x59ed216765690f56, + 0xe0f218b8d25088b8,0x306869c13ec3532c, + 0x8c974f7383725573,0x1e414218c73a13fb, + 0xafbd2350644eeacf,0xe5d1929ef90898fa, + 0xdbac6c247d62a583,0xdf45f746b74abf39, + 0x894bc396ce5da772,0x6b8bba8c328eb783, + 0xab9eb47c81f5114f,0x66ea92f3f326564, + 0xd686619ba27255a2,0xc80a537b0efefebd, + 0x8613fd0145877585,0xbd06742ce95f5f36, + 0xa798fc4196e952e7,0x2c48113823b73704, + 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, + 0x82ef85133de648c4,0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3,0x318df905079926a8, + 0xffbbcfe994e5c61f,0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d,0x6bea10ca65c084e, + 0xc31bfa0fe5698db8,0x486e494fcff30a62, + 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7,0xf89629465a75e01c, + 0xbe89523386091465,0xf6bbb397f1135823, + 0xee2ba6c0678b597f,0x746aa07ded582e2c, + 0x94db483840b717ef,0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb,0x92f34d62616ce413, + 0xe896a0d7e51e1566,0x77b020baf9c81d17, + 0x915e2486ef32cd60,0xace1474dc1d122e, + 0xb5b5ada8aaff80b8,0xd819992132456ba, + 0xe3231912d5bf60e6,0x10e1fff697ed6c69, + 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d,0x86c16c98d2c953c6, + 0xd89d64d57a607744,0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b,0x11471cd764ad4972, + 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, + 0xd389b47879823479,0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb,0xcedf722a585139ba, + 0xa54394fe1eedb8fe,0xc2974eb4ee658828, + 0xce947a3da6a9273e,0x733d226229feea32, + 0x811ccc668829b887,0x806357d5a3f525f, + 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052,0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67,0xbbac2078d443ace2, + 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, + 0xc5029163f384a931,0xa9e795e65d4df11, + 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e,0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, + 0xf07da27a82c37088,0x5d767327bb4e5a4c, + 0x964e858c91ba2655,0x3a6a07f8d510f86f, + 0xbbe226efb628afea,0x890489f70a55368b, + 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb,0x9ce6ebb40173744, + 0xe55990879ddcaabd,0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6,0x9fa946824a12232d, + 0xb32df8e9f3546564,0x47939822dc96abf9, + 0xdff9772470297ebd,0x59787e2b93bc56f7, + 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, + 0xaefae51477a06b03,0xede622920b6b23f1, + 0xdab99e59958885c4,0xe95fab368e45eced, + 0x88b402f7fd75539b,0x11dbcb0218ebb414, + 0xaae103b5fcd2a881,0xd652bdc29f26a119, + 0xd59944a37c0752a2,0x4be76d3346f0495f, + 0x857fcae62d8493a5,0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2,0x7e2000a41346a7a7, + 0x825ecc24c873782f,0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b,0x728900802f0f32fa, + 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc,0xe2f610c84987bfa8, + 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143,0x91503d1c79720dbb, + 0xf8a95fcf88747d94,0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, + 0xc24452da229b021b,0xfbe85badce996168, + 0xf2d56790ab41c2a2,0xfae27299423fb9c3, + 0x97c560ba6b0919a5,0xdccd879fc967d41a, + 0xbdb6b8e905cb600f,0x5400e987bbc1c920, + 0xed246723473e3813,0x290123e9aab23b68, + 0x9436c0760c86e30b,0xf9a0b6720aaf6521, + 0xb94470938fa89bce,0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2,0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232,0x25c6da63c38de1b0, + 0x8d590723948a535f,0x579c487e5a38ad0e, + 0xb0af48ec79ace837,0x2d835a9df0c6d851, + 0xdcdb1b2798182244,0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5,0xe272467e3d222f3f, + 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea,0x98e947129fc2b4e9, + 0xa87fea27a539e9a5,0x3f2398d747b36224, + 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89,0x1953cf68300424ac, + 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, + 0xcdb02555653131b6,0x3792f412cb06794d, + 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b,0xf245825a5a445275, + 0xfb158592be068d2e,0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d,0x55464dd69685606b, + 0xc428d05aa4751e4c,0xaa97e14c3c26b886, + 0xf53304714d9265df,0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab,0xe546a8038efe4029, + 0xbf8fdb78849a5f96,0xde98520472bdd033, + 0xef73d256a5c0f77c,0x963e66858f6d4440, + 0x95a8637627989aad,0xdde7001379a44aa8, + 0xbb127c53b17ec159,0x5560c018580d5d52, + 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, + 0x9226712162ab070d,0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, + 0xb267ed1940f1c61c,0x55f038b237591ed3, + 0xdf01e85f912e37a3,0x6b6c46dec52f6688, + 0x8b61313bbabce2c6,0x2323ac4b3b3da015, + 0xae397d8aa96c1b77,0xabec975e0a0d081a, + 0xd9c7dced53c72255,0x96e7bd358c904a21, + 0x881cea14545c7575,0x7e50d64177da2e54, + 0xaa242499697392d2,0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, + 0x84ec3c97da624ab4,0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba,0x67de18eda5814af2, + 0x81ceb32c4b43fcf4,0x80eacf948770ced7, + 0xa2425ff75e14fc31,0xa1258379a94d028d, + 0xcad2f7f5359a3b3e,0x96ee45813a04330, + 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, + 0x9e74d1b791e07e48,0x775ea264cf55347e, + 0xc612062576589dda,0x95364afe032a819e, + 0xf79687aed3eec551,0x3a83ddbd83f52205, + 0x9abe14cd44753b52,0xc4926a9672793543, + 0xc16d9a0095928a27,0x75b7053c0f178294, + 0xf1c90080baf72cb1,0x5324c68b12dd6339, + 0x971da05074da7bee,0xd3f6fc16ebca5e04, + 0xbce5086492111aea,0x88f4bb1ca6bcf585, + 0xec1e4a7db69561a5,0x2b31e9e3d06c32e6, + 0x9392ee8e921d5d07,0x3aff322e62439fd0, + 0xb877aa3236a4b449,0x9befeb9fad487c3, + 0xe69594bec44de15b,0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9,0xf9d37014bf60a11, + 0xb424dc35095cd80f,0x538484c19ef38c95, + 0xe12e13424bb40e13,0x2865a5f206b06fba, + 0x8cbccc096f5088cb,0xf93f87b7442e45d4, + 0xafebff0bcb24aafe,0xf78f69a51539d749, + 0xdbe6fecebdedd5be,0xb573440e5a884d1c, + 0x89705f4136b4a597,0x31680a88f8953031, + 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc,0x3d32907604691b4d, + 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, + 0xa7c5ac471b478423,0xfcf80dc33721d54, + 0xd1b71758e219652b,0xd3c36113404ea4a9, + 0x83126e978d4fdf3b,0x645a1cac083126ea, + 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, + 0xcccccccccccccccc,0xcccccccccccccccd, + 0x8000000000000000,0x0, + 0xa000000000000000,0x0, + 0xc800000000000000,0x0, + 0xfa00000000000000,0x0, + 0x9c40000000000000,0x0, + 0xc350000000000000,0x0, + 0xf424000000000000,0x0, + 0x9896800000000000,0x0, + 0xbebc200000000000,0x0, + 0xee6b280000000000,0x0, + 0x9502f90000000000,0x0, + 0xba43b74000000000,0x0, + 0xe8d4a51000000000,0x0, + 0x9184e72a00000000,0x0, + 0xb5e620f480000000,0x0, + 0xe35fa931a0000000,0x0, + 0x8e1bc9bf04000000,0x0, + 0xb1a2bc2ec5000000,0x0, + 0xde0b6b3a76400000,0x0, + 0x8ac7230489e80000,0x0, + 0xad78ebc5ac620000,0x0, + 0xd8d726b7177a8000,0x0, + 0x878678326eac9000,0x0, + 0xa968163f0a57b400,0x0, + 0xd3c21bcecceda100,0x0, + 0x84595161401484a0,0x0, + 0xa56fa5b99019a5c8,0x0, + 0xcecb8f27f4200f3a,0x0, + 0x813f3978f8940984,0x4000000000000000, + 0xa18f07d736b90be5,0x5000000000000000, + 0xc9f2c9cd04674ede,0xa400000000000000, + 0xfc6f7c4045812296,0x4d00000000000000, + 0x9dc5ada82b70b59d,0xf020000000000000, + 0xc5371912364ce305,0x6c28000000000000, + 0xf684df56c3e01bc6,0xc732000000000000, + 0x9a130b963a6c115c,0x3c7f400000000000, + 0xc097ce7bc90715b3,0x4b9f100000000000, + 0xf0bdc21abb48db20,0x1e86d40000000000, + 0x96769950b50d88f4,0x1314448000000000, + 0xbc143fa4e250eb31,0x17d955a000000000, + 0xeb194f8e1ae525fd,0x5dcfab0800000000, + 0x92efd1b8d0cf37be,0x5aa1cae500000000, + 0xb7abc627050305ad,0xf14a3d9e40000000, + 0xe596b7b0c643c719,0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f,0xe4820023a2000000, + 0xb35dbf821ae4f38b,0xdda2802c8a800000, + 0xe0352f62a19e306e,0xd50b2037ad200000, + 0x8c213d9da502de45,0x4526f422cc340000, + 0xaf298d050e4395d6,0x9670b12b7f410000, + 0xdaf3f04651d47b4c,0x3c0cdd765f114000, + 0x88d8762bf324cd0f,0xa5880a69fb6ac800, + 0xab0e93b6efee0053,0x8eea0d047a457a00, + 0xd5d238a4abe98068,0x72a4904598d6d880, + 0x85a36366eb71f041,0x47a6da2b7f864750, + 0xa70c3c40a64e6c51,0x999090b65f67d924, + 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, + 0x82818f1281ed449f,0xbff8f10e7a8921a4, + 0xa321f2d7226895c7,0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, + 0xfee50b7025c36a08,0x2f236d04753d5b4, + 0x9f4f2726179a2245,0x1d762422c946590, + 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, + 0x9b934c3b330c8577,0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a,0x8bef464e3945ef7a, + 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, + 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436,0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44,0x60dbbca87196b616, + 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, + 0xb51d13aea4a488dd,0x6babab6398bdbe41, + 0xe264589a4dcdab14,0xc696963c7eed2dd1, + 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8,0x3b25a55f43294bcb, + 0xdd15fe86affad912,0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab,0x6e3569326c784337, + 0xacb92ed9397bf996,0x49c2c37f07965404, + 0xd7e77a8f87daf7fb,0xdc33745ec97be906, + 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, + 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b,0xf50a3fa490c30190, + 0x83c7088e1aab65db,0x792667c6da79e0fa, + 0xa4b8cab1a1563f52,0x577001b891185938, + 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, + 0x80b05e5ac60b6178,0x544f8158315b05b4, + 0xa0dc75f1778e39d6,0x696361ae3db1c721, + 0xc913936dd571c84c,0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f,0x4ab48a04065c723, + 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, + 0xc45d1df942711d9a,0x3ba5d0bd324f8394, + 0xf5746577930d6500,0xca8f44ec7ee36479, + 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5,0xbba1f1d158724a12, + 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, + 0xea1575143cf97226,0xf52d09d71a3293bd, + 0x924d692ca61be758,0x593c2626705f9c56, + 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, + 0xe498f455c38b997a,0xb6dfb9c0f956447, + 0x8edf98b59a373fec,0x4724bd4189bd5eac, + 0xb2977ee300c50fe7,0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, + 0x8b865b215899f46c,0xbd79e0d20082ee74, + 0xae67f1e9aec07187,0xecd8590680a3aa11, + 0xda01ee641a708de9,0xe80e6f4820cc9495, + 0x884134fe908658b2,0x3109058d147fdcdd, + 0xaa51823e34a7eede,0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, + 0x850fadc09923329e,0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45,0x84db8346b786151c, + 0xcfe87f7cef46ff16,0xe612641865679a63, + 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749,0xe3be5e330f38f09d, + 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, + 0xc646d63501a1511d,0xb281e1fd541501b8, + 0xf7d88bc24209a565,0x1f225a7ca91a4226, + 0x9ae757596946075f,0x3375788de9b06958, + 0xc1a12d2fc3978937,0x52d6b1641c83ae, + 0xf209787bb47d6b84,0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332,0xf840b7ba963646e0, + 0xbd176620a501fbff,0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf,0xc66f336c36b10137, + 0xb8a8d9bbe123f017,0xb80b0047445d4184, + 0xe6d3102ad96cec1d,0xa60dc059157491e5, + 0x9043ea1ac7e41392,0x87c89837ad68db2f, + 0xb454e4a179dd1877,0x29babe4598c311fb, + 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d,0x1899e4a65f58660c, + 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d,0x76707543f4fa1f73, + 0x899504ae72497eba,0x6a06494a791c53a8, + 0xabfa45da0edbde69,0x487db9d17636892, + 0xd6f8d7509292d603,0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, + 0xa7f26836f282b732,0x8e6cac7768d7141e, + 0xd1ef0244af2364ff,0x3207d795430cd926, + 0x8335616aed761f1f,0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, + 0xcd036837130890a1,0x36dba887c37a8c0f, + 0x802221226be55a64,0xc2494954da2c9789, + 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d,0x6f92829494e5acc7, + 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, + 0x9c69a97284b578d7,0xff2a760414536efb, + 0xc38413cf25e2d70d,0xfef5138519684aba, + 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, + 0x98bf2f79d5993802,0xef2f773ffbd97a61, + 0xbeeefb584aff8603,0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2,0xdd945a747bf26183, + 0xba756174393d88df,0x94f971119aeef9e4, + 0xe912b9d1478ceb17,0x7a37cd5601aab85d, + 0x91abb422ccb812ee,0xac62e055c10ab33a, + 0xb616a12b7fe617aa,0x577b986b314d6009, + 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d,0x14588f13be847307, + 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee,0x25de7bb9480d5854, + 0xada72ccc20054ae9,0xaf561aa79a10ae6a, + 0xd910f7ff28069da4,0x1b2ba1518094da04, + 0x87aa9aff79042286,0x90fb44d2f05d0842, + 0xa99541bf57452b28,0x353a1607ac744a53, + 0xd3fa922f2d1675f2,0x42889b8997915ce8, + 0x847c9b5d7c2e09b7,0x69956135febada11, + 0xa59bc234db398c25,0x43fab9837e699095, + 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, + 0x8161afb94b44f57d,0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc,0x6462d92a69731732, + 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78,0x5cda735244c3d43e, + 0x9defbf01b061adab,0x3a0888136afa64a7, + 0xc56baec21c7a1916,0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b,0x8aad549e57273d45, + 0x9a3c2087a63f6399,0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, + 0x969eb7c47859e743,0x9f644ae5a4b1b325, + 0xbc4665b596706114,0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8,0x9a7f12442d588f2, + 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, + 0x8fa475791a569d10,0xf96e017d694487bc, + 0xb38d92d760ec4455,0x37c981dcc395a9ac, + 0xe070f78d3927556a,0x85bbe253f47b1417, + 0x8c469ab843b89562,0x93956d7478ccec8e, + 0xaf58416654a6babb,0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, + 0x88fcf317f22241e2,0x441fece3bdf81f03, + 0xab3c2fddeeaad25a,0xd527e81cad7626c3, + 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, + 0x85c7056562757456,0xf6872d5667844e49, + 0xa738c6bebb12d16c,0xb428f8ac016561db, + 0xd106f86e69d785c7,0xe13336d701beba52, + 0x82a45b450226b39c,0xecc0024661173473, + 0xa34d721642b06084,0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, + 0xff290242c83396ce,0x7e67047175a15271, + 0x9f79a169bd203e41,0xf0062c6e984d386, + 0xc75809c42c684dd1,0x52c07b78a3e60868, + 0xf92e0c3537826145,0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb,0x88a66076400bb691, + 0xc2abf989935ddbfe,0x6acff893d00ea435, + 0xf356f7ebf83552fe,0x583f6b8c4124d43, + 0x98165af37b2153de,0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, + 0xeda2ee1c7064130c,0x1162def06f79df73, + 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, + 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0,0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, + 0x8da471a9de737e24,0x5ceaecfed289e5d2, + 0xb10d8e1456105dad,0x7425a83e872c5f47, + 0xdd50f1996b947518,0xd12f124e28f77719, + 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b,0x636cc64d1001550b, + 0xd8210befd30efa5a,0x3c47f7e05401aa4e, + 0x8714a775e3e95c78,0x65acfaec34810a71, + 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, + 0xd31045a8341ca07c,0x1ede48111209a050, + 0x83ea2b892091e44d,0x934aed0aab460432, + 0xa4e4b66b68b65d60,0xf81da84d5617853f, + 0xce1de40642e3f4b9,0x36251260ab9d668e, + 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, + 0xa1075a24e4421730,0xb24cf65b8612f81f, + 0xc94930ae1d529cfc,0xdee033f26797b627, + 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, + 0x9d412e0806e88aa5,0x8e1f289560ee864e, + 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2,0xae10af696774b1db, + 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f,0x17fd090a58d32af3, + 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, + 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513,0x84c86189216dc5ed, + 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515,0xfabaf3feaa5334a, + 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8,0x743e20e9ef511012, + 0xdf78e4b2bd342cf6,0x914da9246b255416, + 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, + 0xae9672aba3d0c320,0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, + 0x8865899617fb1871,0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, + 0xd51ea6fa85785631,0x552a74227f3ea565, + 0x8533285c936b35de,0xd53a88958f87275f, + 0xa67ff273b8460356,0x8a892abaf368f137, + 0xd01fef10a657842c,0x2d2b7569b0432d85, + 0x8213f56a67f6b29b,0x9c3b29620e29fc73, + 0xa298f2c501f45f42,0x8349f3ba91b47b8f, + 0xcb3f2f7642717713,0x241c70a936219a73, + 0xfe0efb53d30dd4d7,0xed238cd383aa0110, + 0x9ec95d1463e8a506,0xf4363804324a40aa, + 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da,0xdd94b7868e94050a, + 0x9b10a4e5e9913128,0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf,0xbc633b39673c8cec, + 0x976e41088617ca01,0xd5be0503e085d813, + 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, + 0xec9c459d51852ba2,0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45,0xcabb90e5c942b503, + 0xb8da1662e7b00a17,0x3d6a751f3b936243, + 0xe7109bfba19c0c9d,0xcc512670a783ad4, + 0x906a617d450187e2,0x27fb2b80668b24c5, + 0xb484f9dc9641e9da,0xb1f9f660802dedf6, + 0xe1a63853bbd26451,0x5e7873f8a0396973, + 0x8d07e33455637eb2,0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7,0x7641a140cc7810fb, + 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, + 0xac2820d9623bf429,0x546345fa9fbdcd44, + 0xd732290fbacaf133,0xa97c177947ad4095, + 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, + 0xa81f301449ee8c70,0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c,0x73832eec6fff3111, + 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, + 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, + 0xa0555e361951c366,0xd7e105bcc332621f, + 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, + 0xfa856334878fc150,0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07,0xa862f80ec4700c8, + 0xf4a642e14c6262c8,0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, + 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, + 0xeeea5d5004981478,0x1858ccfce06cac74, + 0x95527a5202df0ccb,0xf37801e0c43ebc8, + 0xbaa718e68396cffd,0xd30560258f54e6ba, + 0xe950df20247c83fd,0x47c6b82ef32a2069, + 0x91d28b7416cdd27e,0x4cdc331d57fa5441, + 0xb6472e511c81471d,0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5,0x58180fddd97723a6, + 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; +using powers = powers_template<>; + +} + +#endif + + +#ifndef FASTFLOAT_DECIMAL_TO_BINARY_H +#define FASTFLOAT_DECIMAL_TO_BINARY_H + +//included above: +//#include +#include +#include +//included above: +//#include +#include +//included above: +//#include + +namespace fast_float { + +// This will compute or rather approximate w * 5**q and return a pair of 64-bit words approximating +// the result, with the "high" part corresponding to the most significant bits and the +// low part corresponding to the least significant bits. +// +template +fastfloat_really_inline +value128 compute_product_approximation(int64_t q, uint64_t w) { + const int index = 2 * int(q - powers::smallest_power_of_five); + // For small values of q, e.g., q in [0,27], the answer is always exact because + // The line value128 firstproduct = full_multiplication(w, power_of_five_128[index]); + // gives the exact answer. + value128 firstproduct = full_multiplication(w, powers::power_of_five_128[index]); + static_assert((bit_precision >= 0) && (bit_precision <= 64), " precision should be in (0,64]"); + constexpr uint64_t precision_mask = (bit_precision < 64) ? + (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) + : uint64_t(0xFFFFFFFFFFFFFFFF); + if((firstproduct.high & precision_mask) == precision_mask) { // could further guard with (lower + w < lower) + // regarding the second product, we only need secondproduct.high, but our expectation is that the compiler will optimize this extra work away if needed. + value128 secondproduct = full_multiplication(w, powers::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { + firstproduct.high++; + } + } + return firstproduct; +} + +namespace detail { +/** + * For q in (0,350), we have that + * f = (((152170 + 65536) * q ) >> 16); + * is equal to + * floor(p) + q + * where + * p = log(5**q)/log(2) = q * log(5)/log(2) + * + * For negative values of q in (-400,0), we have that + * f = (((152170 + 65536) * q ) >> 16); + * is equal to + * -ceil(p) + q + * where + * p = log(5**-q)/log(2) = -q * log(5)/log(2) + */ + constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept { + return (((152170 + 65536) * q) >> 16) + 63; + } +} // namespace detail + +// create an adjusted mantissa, biased by the invalid power2 +// for significant digits already multiplied by 10 ** q. +template +fastfloat_really_inline +adjusted_mantissa compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept { + int hilz = int(w >> 63) ^ 1; + adjusted_mantissa answer; + answer.mantissa = w << hilz; + int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent(); + answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + invalid_am_bias); + return answer; +} + +// w * 10 ** q, without rounding the representation up. +// the power2 in the exponent will be adjusted by invalid_am_bias. +template +fastfloat_really_inline +adjusted_mantissa compute_error(int64_t q, uint64_t w) noexcept { + int lz = leading_zeroes(w); + w <<= lz; + value128 product = compute_product_approximation(q, w); + return compute_error_scaled(q, product.high, lz); +} + +// w * 10 ** q +// The returned value should be a valid ieee64 number that simply need to be packed. +// However, in some very rare cases, the computation will fail. In such cases, we +// return an adjusted_mantissa with a negative power of 2: the caller should recompute +// in such cases. +template +fastfloat_really_inline +adjusted_mantissa compute_float(int64_t q, uint64_t w) noexcept { + adjusted_mantissa answer; + if ((w == 0) || (q < binary::smallest_power_of_ten())) { + answer.power2 = 0; + answer.mantissa = 0; + // result should be zero + return answer; + } + if (q > binary::largest_power_of_ten()) { + // we want to get infinity: + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + // At this point in time q is in [powers::smallest_power_of_five, powers::largest_power_of_five]. + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(w); + w <<= lz; + + // The required precision is binary::mantissa_explicit_bits() + 3 because + // 1. We need the implicit bit + // 2. We need an extra bit for rounding purposes + // 3. We might lose a bit due to the "upperbit" routine (result too small, requiring a shift) + + value128 product = compute_product_approximation(q, w); + if(product.low == 0xFFFFFFFFFFFFFFFF) { // could guard it further + // In some very rare cases, this could happen, in which case we might need a more accurate + // computation that what we can provide cheaply. This is very, very unlikely. + // + const bool inside_safe_exponent = (q >= -27) && (q <= 55); // always good because 5**q <2**128 when q>=0, + // and otherwise, for q<0, we have 5**-q<2**64 and the 128-bit reciprocal allows for exact computation. + if(!inside_safe_exponent) { + return compute_error_scaled(q, product.high, lz); + } + } + // The "compute_product_approximation" function can be slightly slower than a branchless approach: + // value128 product = compute_product(q, w); + // but in practice, we can win big with the compute_product_approximation if its additional branch + // is easily predicted. Which is best is data specific. + int upperbit = int(product.high >> 63); + + answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); + + answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - binary::minimum_exponent()); + if (answer.power2 <= 0) { // we have a subnormal? + // Here have that answer.power2 <= 0 so -answer.power2 >= 0 + if(-answer.power2 + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + answer.power2 = 0; + answer.mantissa = 0; + // result should be zero + return answer; + } + // next line is safe because -answer.power2 + 1 < 64 + answer.mantissa >>= -answer.power2 + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + answer.mantissa += (answer.mantissa & 1); // round up + answer.mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + answer.power2 = (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) ? 0 : 1; + return answer; + } + + // usually, we round *up*, but if we fall right in between and and we have an + // even basis, we need to round down + // We are only concerned with the cases where 5**q fits in single 64-bit word. + if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && (q <= binary::max_exponent_round_to_even()) && + ((answer.mantissa & 3) == 1) ) { // we may fall between two floats! + // To be in-between two floats we need that in doing + // answer.mantissa = product.high >> (upperbit + 64 - binary::mantissa_explicit_bits() - 3); + // ... we dropped out only zeroes. But if this happened, then we can go back!!! + if((answer.mantissa << (upperbit + 64 - binary::mantissa_explicit_bits() - 3)) == product.high) { + answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up + } + } + + answer.mantissa += (answer.mantissa & 1); // round up + answer.mantissa >>= 1; + if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) { + answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits()); + answer.power2++; // undo previous addition + } + + answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits()); + if (answer.power2 >= binary::infinite_power()) { // infinity + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + } + return answer; +} + +} // namespace fast_float + +#endif + + +#ifndef FASTFLOAT_BIGINT_H +#define FASTFLOAT_BIGINT_H + +#include +//included above: +//#include +//included above: +//#include +//included above: +//#include + + +namespace fast_float { + +// the limb width: we want efficient multiplication of double the bits in +// limb, or for 64-bit limbs, at least 64-bit multiplication where we can +// extract the high and low parts efficiently. this is every 64-bit +// architecture except for sparc, which emulates 128-bit multiplication. +// we might have platforms where `CHAR_BIT` is not 8, so let's avoid +// doing `8 * sizeof(limb)`. +#if defined(FASTFLOAT_64BIT) && !defined(__sparc) +#define FASTFLOAT_64BIT_LIMB +typedef uint64_t limb; +constexpr size_t limb_bits = 64; +#else +#define FASTFLOAT_32BIT_LIMB +typedef uint32_t limb; +constexpr size_t limb_bits = 32; +#endif + +typedef span limb_span; + +// number of bits in a bigint. this needs to be at least the number +// of bits required to store the largest bigint, which is +// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or +// ~3600 bits, so we round to 4000. +constexpr size_t bigint_bits = 4000; +constexpr size_t bigint_limbs = bigint_bits / limb_bits; + +// vector-like type that is allocated on the stack. the entire +// buffer is pre-allocated, and only the length changes. +template +struct stackvec { + limb data[size]; + // we never need more than 150 limbs + uint16_t length{0}; + + stackvec() = default; + stackvec(const stackvec &) = delete; + stackvec &operator=(const stackvec &) = delete; + stackvec(stackvec &&) = delete; + stackvec &operator=(stackvec &&other) = delete; + + // create stack vector from existing limb span. + stackvec(limb_span s) { + FASTFLOAT_ASSERT(try_extend(s)); + } + + limb& operator[](size_t index) noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return data[index]; + } + const limb& operator[](size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return data[index]; + } + // index from the end of the container + const limb& rindex(size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + size_t rindex = length - index - 1; + return data[rindex]; + } + + // set the length, without bounds checking. + void set_len(size_t len) noexcept { + length = uint16_t(len); + } + constexpr size_t len() const noexcept { + return length; + } + constexpr bool is_empty() const noexcept { + return length == 0; + } + constexpr size_t capacity() const noexcept { + return size; + } + // append item to vector, without bounds checking + void push_unchecked(limb value) noexcept { + data[length] = value; + length++; + } + // append item to vector, returning if item was added + bool try_push(limb value) noexcept { + if (len() < capacity()) { + push_unchecked(value); + return true; + } else { + return false; + } + } + // add items to the vector, from a span, without bounds checking + void extend_unchecked(limb_span s) noexcept { + limb* ptr = data + length; + ::memcpy((void*)ptr, (const void*)s.ptr, sizeof(limb) * s.len()); + set_len(len() + s.len()); + } + // try to add items to the vector, returning if items were added + bool try_extend(limb_span s) noexcept { + if (len() + s.len() <= capacity()) { + extend_unchecked(s); + return true; + } else { + return false; + } + } + // resize the vector, without bounds checking + // if the new size is longer than the vector, assign value to each + // appended item. + void resize_unchecked(size_t new_len, limb value) noexcept { + if (new_len > len()) { + size_t count = new_len - len(); + limb* first = data + len(); + limb* last = first + count; + ::std::fill(first, last, value); + set_len(new_len); + } else { + set_len(new_len); + } + } + // try to resize the vector, returning if the vector was resized. + bool try_resize(size_t new_len, limb value) noexcept { + if (new_len > capacity()) { + return false; + } else { + resize_unchecked(new_len, value); + return true; + } + } + // check if any limbs are non-zero after the given index. + // this needs to be done in reverse order, since the index + // is relative to the most significant limbs. + bool nonzero(size_t index) const noexcept { + while (index < len()) { + if (rindex(index) != 0) { + return true; + } + index++; + } + return false; + } + // normalize the big integer, so most-significant zero limbs are removed. + void normalize() noexcept { + while (len() > 0 && rindex(0) == 0) { + length--; + } + } +}; + +fastfloat_really_inline +uint64_t empty_hi64(bool& truncated) noexcept { + truncated = false; + return 0; +} + +fastfloat_really_inline +uint64_t uint64_hi64(uint64_t r0, bool& truncated) noexcept { + truncated = false; + int shl = leading_zeroes(r0); + return r0 << shl; +} + +fastfloat_really_inline +uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept { + int shl = leading_zeroes(r0); + if (shl == 0) { + truncated = r1 != 0; + return r0; + } else { + int shr = 64 - shl; + truncated = (r1 << shl) != 0; + return (r0 << shl) | (r1 >> shr); + } +} + +fastfloat_really_inline +uint64_t uint32_hi64(uint32_t r0, bool& truncated) noexcept { + return uint64_hi64(r0, truncated); +} + +fastfloat_really_inline +uint64_t uint32_hi64(uint32_t r0, uint32_t r1, bool& truncated) noexcept { + uint64_t x0 = r0; + uint64_t x1 = r1; + return uint64_hi64((x0 << 32) | x1, truncated); +} + +fastfloat_really_inline +uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noexcept { + uint64_t x0 = r0; + uint64_t x1 = r1; + uint64_t x2 = r2; + return uint64_hi64(x0, (x1 << 32) | x2, truncated); +} + +// add two small integers, checking for overflow. +// we want an efficient operation. for msvc, where +// we don't have built-in intrinsics, this is still +// pretty fast. +fastfloat_really_inline +limb scalar_add(limb x, limb y, bool& overflow) noexcept { + limb z; + +// gcc and clang +#if defined(__has_builtin) + #if __has_builtin(__builtin_add_overflow) + overflow = __builtin_add_overflow(x, y, &z); + return z; + #endif +#endif + + // generic, this still optimizes correctly on MSVC. + z = x + y; + overflow = z < x; + return z; +} + +// multiply two small integers, getting both the high and low bits. +fastfloat_really_inline +limb scalar_mul(limb x, limb y, limb& carry) noexcept { +#ifdef FASTFLOAT_64BIT_LIMB + #if defined(__SIZEOF_INT128__) + // GCC and clang both define it as an extension. + __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry); + carry = limb(z >> limb_bits); + return limb(z); + #else + // fallback, no native 128-bit integer multiplication with carry. + // on msvc, this optimizes identically, somehow. + value128 z = full_multiplication(x, y); + bool overflow; + z.low = scalar_add(z.low, carry, overflow); + z.high += uint64_t(overflow); // cannot overflow + carry = z.high; + return z.low; + #endif +#else + uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry); + carry = limb(z >> limb_bits); + return limb(z); +#endif +} + +// add scalar value to bigint starting from offset. +// used in grade school multiplication +template +inline bool small_add_from(stackvec& vec, limb y, size_t start) noexcept { + size_t index = start; + limb carry = y; + bool overflow; + while (carry != 0 && index < vec.len()) { + vec[index] = scalar_add(vec[index], carry, overflow); + carry = limb(overflow); + index += 1; + } + if (carry != 0) { + FASTFLOAT_TRY(vec.try_push(carry)); + } + return true; +} + +// add scalar value to bigint. +template +fastfloat_really_inline bool small_add(stackvec& vec, limb y) noexcept { + return small_add_from(vec, y, 0); +} + +// multiply bigint by scalar value. +template +inline bool small_mul(stackvec& vec, limb y) noexcept { + limb carry = 0; + for (size_t index = 0; index < vec.len(); index++) { + vec[index] = scalar_mul(vec[index], y, carry); + } + if (carry != 0) { + FASTFLOAT_TRY(vec.try_push(carry)); + } + return true; +} + +// add bigint to bigint starting from index. +// used in grade school multiplication +template +bool large_add_from(stackvec& x, limb_span y, size_t start) noexcept { + // the effective x buffer is from `xstart..x.len()`, so exit early + // if we can't get that current range. + if (x.len() < start || y.len() > x.len() - start) { + FASTFLOAT_TRY(x.try_resize(y.len() + start, 0)); + } + + bool carry = false; + for (size_t index = 0; index < y.len(); index++) { + limb xi = x[index + start]; + limb yi = y[index]; + bool c1 = false; + bool c2 = false; + xi = scalar_add(xi, yi, c1); + if (carry) { + xi = scalar_add(xi, 1, c2); + } + x[index + start] = xi; + carry = c1 | c2; + } + + // handle overflow + if (carry) { + FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start)); + } + return true; +} + +// add bigint to bigint. +template +fastfloat_really_inline bool large_add_from(stackvec& x, limb_span y) noexcept { + return large_add_from(x, y, 0); +} + +// grade-school multiplication algorithm +template +bool long_mul(stackvec& x, limb_span y) noexcept { + limb_span xs = limb_span(x.data, x.len()); + stackvec z(xs); + limb_span zs = limb_span(z.data, z.len()); + + if (y.len() != 0) { + limb y0 = y[0]; + FASTFLOAT_TRY(small_mul(x, y0)); + for (size_t index = 1; index < y.len(); index++) { + limb yi = y[index]; + stackvec zi; + if (yi != 0) { + // re-use the same buffer throughout + zi.set_len(0); + FASTFLOAT_TRY(zi.try_extend(zs)); + FASTFLOAT_TRY(small_mul(zi, yi)); + limb_span zis = limb_span(zi.data, zi.len()); + FASTFLOAT_TRY(large_add_from(x, zis, index)); + } + } + } + + x.normalize(); + return true; +} + +// grade-school multiplication algorithm +template +bool large_mul(stackvec& x, limb_span y) noexcept { + if (y.len() == 1) { + FASTFLOAT_TRY(small_mul(x, y[0])); + } else { + FASTFLOAT_TRY(long_mul(x, y)); + } + return true; +} + +// big integer type. implements a small subset of big integer +// arithmetic, using simple algorithms since asymptotically +// faster algorithms are slower for a small number of limbs. +// all operations assume the big-integer is normalized. +struct bigint { + // storage of the limbs, in little-endian order. + stackvec vec; + + bigint(): vec() {} + bigint(const bigint &) = delete; + bigint &operator=(const bigint &) = delete; + bigint(bigint &&) = delete; + bigint &operator=(bigint &&other) = delete; + + bigint(uint64_t value): vec() { +#ifdef FASTFLOAT_64BIT_LIMB + vec.push_unchecked(value); +#else + vec.push_unchecked(uint32_t(value)); + vec.push_unchecked(uint32_t(value >> 32)); +#endif + vec.normalize(); + } + + // get the high 64 bits from the vector, and if bits were truncated. + // this is to get the significant digits for the float. + uint64_t hi64(bool& truncated) const noexcept { +#ifdef FASTFLOAT_64BIT_LIMB + if (vec.len() == 0) { + return empty_hi64(truncated); + } else if (vec.len() == 1) { + return uint64_hi64(vec.rindex(0), truncated); + } else { + uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated); + truncated |= vec.nonzero(2); + return result; + } +#else + if (vec.len() == 0) { + return empty_hi64(truncated); + } else if (vec.len() == 1) { + return uint32_hi64(vec.rindex(0), truncated); + } else if (vec.len() == 2) { + return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated); + } else { + uint64_t result = uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated); + truncated |= vec.nonzero(3); + return result; + } +#endif + } + + // compare two big integers, returning the large value. + // assumes both are normalized. if the return value is + // negative, other is larger, if the return value is + // positive, this is larger, otherwise they are equal. + // the limbs are stored in little-endian order, so we + // must compare the limbs in ever order. + int compare(const bigint& other) const noexcept { + if (vec.len() > other.vec.len()) { + return 1; + } else if (vec.len() < other.vec.len()) { + return -1; + } else { + for (size_t index = vec.len(); index > 0; index--) { + limb xi = vec[index - 1]; + limb yi = other.vec[index - 1]; + if (xi > yi) { + return 1; + } else if (xi < yi) { + return -1; + } + } + return 0; + } + } + + // shift left each limb n bits, carrying over to the new limb + // returns true if we were able to shift all the digits. + bool shl_bits(size_t n) noexcept { + // Internally, for each item, we shift left by n, and add the previous + // right shifted limb-bits. + // For example, we transform (for u8) shifted left 2, to: + // b10100100 b01000010 + // b10 b10010001 b00001000 + FASTFLOAT_DEBUG_ASSERT(n != 0); + FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8); + + size_t shl = n; + size_t shr = limb_bits - shl; + limb prev = 0; + for (size_t index = 0; index < vec.len(); index++) { + limb xi = vec[index]; + vec[index] = (xi << shl) | (prev >> shr); + prev = xi; + } + + limb carry = prev >> shr; + if (carry != 0) { + return vec.try_push(carry); + } + return true; + } + + // move the limbs left by `n` limbs. + bool shl_limbs(size_t n) noexcept { + FASTFLOAT_DEBUG_ASSERT(n != 0); + if (n + vec.len() > vec.capacity()) { + return false; + } else if (!vec.is_empty()) { + // move limbs + limb* dst = vec.data + n; + const limb* src = vec.data; + ::memmove(dst, src, sizeof(limb) * vec.len()); + // fill in empty limbs + limb* first = vec.data; + limb* last = first + n; + ::std::fill(first, last, 0); + vec.set_len(n + vec.len()); + return true; + } else { + return true; + } + } + + // move the limbs left by `n` bits. + bool shl(size_t n) noexcept { + size_t rem = n % limb_bits; + size_t div = n / limb_bits; + if (rem != 0) { + FASTFLOAT_TRY(shl_bits(rem)); + } + if (div != 0) { + FASTFLOAT_TRY(shl_limbs(div)); + } + return true; + } + + // get the number of leading zeros in the bigint. + int ctlz() const noexcept { + if (vec.is_empty()) { + return 0; + } else { +#ifdef FASTFLOAT_64BIT_LIMB + return leading_zeroes(vec.rindex(0)); +#else + // no use defining a specialized leading_zeroes for a 32-bit type. + uint64_t r0 = vec.rindex(0); + return leading_zeroes(r0 << 32); +#endif + } + } + + // get the number of bits in the bigint. + int bit_length() const noexcept { + int lz = ctlz(); + return int(limb_bits * vec.len()) - lz; + } + + bool mul(limb y) noexcept { + return small_mul(vec, y); + } + + bool add(limb y) noexcept { + return small_add(vec, y); + } + + // multiply as if by 2 raised to a power. + bool pow2(uint32_t exp) noexcept { + return shl(exp); + } + + // multiply as if by 5 raised to a power. + bool pow5(uint32_t exp) noexcept { + // multiply by a power of 5 + static constexpr uint32_t large_step = 135; + static constexpr uint64_t small_power_of_5[] = { + 1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL, + 1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL, + 6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL, + 3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL, + 2384185791015625UL, 11920928955078125UL, 59604644775390625UL, + 298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL, + }; +#ifdef FASTFLOAT_64BIT_LIMB + constexpr static limb large_power_of_5[] = { + 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, + 10482974169319127550UL, 198276706040285095UL}; +#else + constexpr static limb large_power_of_5[] = { + 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, + 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; +#endif + size_t large_length = sizeof(large_power_of_5) / sizeof(limb); + limb_span large = limb_span(large_power_of_5, large_length); + while (exp >= large_step) { + FASTFLOAT_TRY(large_mul(vec, large)); + exp -= large_step; + } +#ifdef FASTFLOAT_64BIT_LIMB + uint32_t small_step = 27; + limb max_native = 7450580596923828125UL; +#else + uint32_t small_step = 13; + limb max_native = 1220703125U; +#endif + while (exp >= small_step) { + FASTFLOAT_TRY(small_mul(vec, max_native)); + exp -= small_step; + } + if (exp != 0) { + FASTFLOAT_TRY(small_mul(vec, limb(small_power_of_5[exp]))); + } + + return true; + } + + // multiply as if by 10 raised to a power. + bool pow10(uint32_t exp) noexcept { + FASTFLOAT_TRY(pow5(exp)); + return pow2(exp); + } +}; + +} // namespace fast_float + +#endif + + +#ifndef FASTFLOAT_ASCII_NUMBER_H +#define FASTFLOAT_ASCII_NUMBER_H + +//included above: +//#include +//included above: +//#include +//included above: +//#include +//included above: +//#include + + +namespace fast_float { + +// Next function can be micro-optimized, but compilers are entirely +// able to optimize it well. +fastfloat_really_inline bool is_integer(char c) noexcept { return c >= '0' && c <= '9'; } + +fastfloat_really_inline uint64_t byteswap(uint64_t val) { + return (val & 0xFF00000000000000) >> 56 + | (val & 0x00FF000000000000) >> 40 + | (val & 0x0000FF0000000000) >> 24 + | (val & 0x000000FF00000000) >> 8 + | (val & 0x00000000FF000000) << 8 + | (val & 0x0000000000FF0000) << 24 + | (val & 0x000000000000FF00) << 40 + | (val & 0x00000000000000FF) << 56; +} + +fastfloat_really_inline uint64_t read_u64(const char *chars) { + uint64_t val; + ::memcpy(&val, chars, sizeof(uint64_t)); +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + // Need to read as-if the number was in little-endian order. + val = byteswap(val); +#endif + return val; +} + +fastfloat_really_inline void write_u64(uint8_t *chars, uint64_t val) { +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + // Need to read as-if the number was in little-endian order. + val = byteswap(val); +#endif + ::memcpy(chars, &val, sizeof(uint64_t)); +} + +// credit @aqrit +fastfloat_really_inline uint32_t parse_eight_digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return uint32_t(val); +} + +fastfloat_really_inline uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { + return parse_eight_digits_unrolled(read_u64(chars)); +} + +// credit @aqrit +fastfloat_really_inline bool is_made_of_eight_digits_fast(uint64_t val) noexcept { + return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + 0x8080808080808080)); +} + +fastfloat_really_inline bool is_made_of_eight_digits_fast(const char *chars) noexcept { + return is_made_of_eight_digits_fast(read_u64(chars)); +} + +typedef span byte_span; + +struct parsed_number_string { + int64_t exponent{0}; + uint64_t mantissa{0}; + const char *lastmatch{nullptr}; + bool negative{false}; + bool valid{false}; + bool too_many_digits{false}; + // contains the range of the significant digits + byte_span integer{}; // non-nullable + byte_span fraction{}; // nullable +}; + +// Assuming that you use no more than 19 digits, this will +// parse an ASCII string. +fastfloat_really_inline +parsed_number_string parse_number_string(const char *p, const char *pend, parse_options options) noexcept { + const chars_format fmt = options.format; + const char decimal_point = options.decimal_point; + + parsed_number_string answer; + answer.valid = false; + answer.too_many_digits = false; + answer.negative = (*p == '-'); + if (*p == '-') { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here + ++p; + if (p == pend) { + return answer; + } + if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot + return answer; + } + } + const char *const start_digits = p; + + uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) + + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok + p += 8; + } + while ((p != pend) && is_integer(*p)) { + // a multiplication by 10 is cheaper than an arbitrary integer + // multiplication + i = 10 * i + + uint64_t(*p - '0'); // might overflow, we will handle the overflow later + ++p; + } + const char *const end_of_integer_part = p; + int64_t digit_count = int64_t(end_of_integer_part - start_digits); + answer.integer = byte_span(start_digits, size_t(digit_count)); + int64_t exponent = 0; + if ((p != pend) && (*p == decimal_point)) { + ++p; + const char* before = p; + // can occur at most twice without overflowing, but let it occur more, since + // for integers with many digits, digit parsing is the primary bottleneck. + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok + p += 8; + } + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + } + exponent = before - p; + answer.fraction = byte_span(before, size_t(p - before)); + digit_count -= exponent; + } + // we must have encountered at least one integer! + if (digit_count == 0) { + return answer; + } + int64_t exp_number = 0; // explicit exponential part + if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) { + const char * location_of_e = p; + ++p; + bool neg_exp = false; + if ((p != pend) && ('-' == *p)) { + neg_exp = true; + ++p; + } else if ((p != pend) && ('+' == *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + ++p; + } + if ((p == pend) || !is_integer(*p)) { + if(!(fmt & chars_format::fixed)) { + // We are in error. + return answer; + } + // Otherwise, we will be ignoring the 'e'. + p = location_of_e; + } else { + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + if(neg_exp) { exp_number = - exp_number; } + exponent += exp_number; + } + } else { + // If it scientific and not fixed, we have to bail out. + if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; } + } + answer.lastmatch = p; + answer.valid = true; + + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon. + // + // We can deal with up to 19 digits. + if (digit_count > 19) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + // We need to be mindful of the case where we only have zeroes... + // E.g., 0.000000000...000. + const char *start = start_digits; + while ((start != pend) && (*start == '0' || *start == decimal_point)) { + if(*start == '0') { digit_count --; } + start++; + } + if (digit_count > 19) { + answer.too_many_digits = true; + // Let us start again, this time, avoiding overflows. + // We don't need to check if is_integer, since we use the + // pre-tokenized spans from above. + i = 0; + p = answer.integer.ptr; + const char* int_end = p + answer.integer.len(); + const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; + while((i < minimal_nineteen_digit_integer) && (p != int_end)) { + i = i * 10 + uint64_t(*p - '0'); + ++p; + } + if (i >= minimal_nineteen_digit_integer) { // We have a big integers + exponent = end_of_integer_part - p + exp_number; + } else { // We have a value with a fractional component. + p = answer.fraction.ptr; + const char* frac_end = p + answer.fraction.len(); + while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - '0'); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; + } + // We have now corrected both exponent and i, to a truncated value + } + } + answer.exponent = exponent; + answer.mantissa = i; + return answer; +} + +} // namespace fast_float + +#endif + + +#ifndef FASTFLOAT_DIGIT_COMPARISON_H +#define FASTFLOAT_DIGIT_COMPARISON_H + +//included above: +//#include +//included above: +//#include +//included above: +//#include +//included above: +//#include + + +namespace fast_float { + +// 1e0 to 1e19 +constexpr static uint64_t powers_of_ten_uint64[] = { + 1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL, + 1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL, + 100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL, + 1000000000000000000UL, 10000000000000000000UL}; + +// calculate the exponent, in scientific notation, of the number. +// this algorithm is not even close to optimized, but it has no practical +// effect on performance: in order to have a faster algorithm, we'd need +// to slow down performance for faster algorithms, and this is still fast. +fastfloat_really_inline int32_t scientific_exponent(parsed_number_string& num) noexcept { + uint64_t mantissa = num.mantissa; + int32_t exponent = int32_t(num.exponent); + while (mantissa >= 10000) { + mantissa /= 10000; + exponent += 4; + } + while (mantissa >= 100) { + mantissa /= 100; + exponent += 2; + } + while (mantissa >= 10) { + mantissa /= 10; + exponent += 1; + } + return exponent; +} + +// this converts a native floating-point number to an extended-precision float. +template +fastfloat_really_inline adjusted_mantissa to_extended(T value) noexcept { + adjusted_mantissa am; + int32_t bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); + if (std::is_same::value) { + constexpr uint32_t exponent_mask = 0x7F800000; + constexpr uint32_t mantissa_mask = 0x007FFFFF; + constexpr uint64_t hidden_bit_mask = 0x00800000; + uint32_t bits; + ::memcpy(&bits, &value, sizeof(T)); + if ((bits & exponent_mask) == 0) { + // denormal + am.power2 = 1 - bias; + am.mantissa = bits & mantissa_mask; + } else { + // normal + am.power2 = int32_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); + am.power2 -= bias; + am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; + } + } else { + constexpr uint64_t exponent_mask = 0x7FF0000000000000; + constexpr uint64_t mantissa_mask = 0x000FFFFFFFFFFFFF; + constexpr uint64_t hidden_bit_mask = 0x0010000000000000; + uint64_t bits; + ::memcpy(&bits, &value, sizeof(T)); + if ((bits & exponent_mask) == 0) { + // denormal + am.power2 = 1 - bias; + am.mantissa = bits & mantissa_mask; + } else { + // normal + am.power2 = int32_t((bits & exponent_mask) >> binary_format::mantissa_explicit_bits()); + am.power2 -= bias; + am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; + } + } + + return am; +} + +// get the extended precision value of the halfway point between b and b+u. +// we are given a native float that represents b, so we need to adjust it +// halfway between b and b+u. +template +fastfloat_really_inline adjusted_mantissa to_extended_halfway(T value) noexcept { + adjusted_mantissa am = to_extended(value); + am.mantissa <<= 1; + am.mantissa += 1; + am.power2 -= 1; + return am; +} + +// round an extended-precision float to the nearest machine float. +template +fastfloat_really_inline void round(adjusted_mantissa& am, callback cb) noexcept { + int32_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; + if (-am.power2 >= mantissa_shift) { + // have a denormal float + int32_t shift = -am.power2 + 1; + cb(am, std::min(shift, 64)); + // check for round-up: if rounding-nearest carried us to the hidden bit. + am.power2 = (am.mantissa < (uint64_t(1) << binary_format::mantissa_explicit_bits())) ? 0 : 1; + return; + } + + // have a normal float, use the default shift. + cb(am, mantissa_shift); + + // check for carry + if (am.mantissa >= (uint64_t(2) << binary_format::mantissa_explicit_bits())) { + am.mantissa = (uint64_t(1) << binary_format::mantissa_explicit_bits()); + am.power2++; + } + + // check for infinite: we could have carried to an infinite power + am.mantissa &= ~(uint64_t(1) << binary_format::mantissa_explicit_bits()); + if (am.power2 >= binary_format::infinite_power()) { + am.power2 = binary_format::infinite_power(); + am.mantissa = 0; + } +} + +template +fastfloat_really_inline +void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept { + uint64_t mask; + uint64_t halfway; + if (shift == 64) { + mask = UINT64_MAX; + } else { + mask = (uint64_t(1) << shift) - 1; + } + if (shift == 0) { + halfway = 0; + } else { + halfway = uint64_t(1) << (shift - 1); + } + uint64_t truncated_bits = am.mantissa & mask; + uint64_t is_above = truncated_bits > halfway; + uint64_t is_halfway = truncated_bits == halfway; + + // shift digits into position + if (shift == 64) { + am.mantissa = 0; + } else { + am.mantissa >>= shift; + } + am.power2 += shift; + + bool is_odd = (am.mantissa & 1) == 1; + am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above)); +} + +fastfloat_really_inline void round_down(adjusted_mantissa& am, int32_t shift) noexcept { + if (shift == 64) { + am.mantissa = 0; + } else { + am.mantissa >>= shift; + } + am.power2 += shift; +} + +fastfloat_really_inline void skip_zeros(const char*& first, const char* last) noexcept { + uint64_t val; + while (std::distance(first, last) >= 8) { + ::memcpy(&val, first, sizeof(uint64_t)); + if (val != 0x3030303030303030) { + break; + } + first += 8; + } + while (first != last) { + if (*first != '0') { + break; + } + first++; + } +} + +// determine if any non-zero digits were truncated. +// all characters must be valid digits. +fastfloat_really_inline bool is_truncated(const char* first, const char* last) noexcept { + // do 8-bit optimizations, can just compare to 8 literal 0s. + uint64_t val; + while (std::distance(first, last) >= 8) { + ::memcpy(&val, first, sizeof(uint64_t)); + if (val != 0x3030303030303030) { + return true; + } + first += 8; + } + while (first != last) { + if (*first != '0') { + return true; + } + first++; + } + return false; +} + +fastfloat_really_inline bool is_truncated(byte_span s) noexcept { + return is_truncated(s.ptr, s.ptr + s.len()); +} + +fastfloat_really_inline +void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { + value = value * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + counter += 8; + count += 8; +} + +fastfloat_really_inline +void parse_one_digit(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { + value = value * 10 + limb(*p - '0'); + p++; + counter++; + count++; +} + +fastfloat_really_inline +void add_native(bigint& big, limb power, limb value) noexcept { + big.mul(power); + big.add(value); +} + +fastfloat_really_inline void round_up_bigint(bigint& big, size_t& count) noexcept { + // need to round-up the digits, but need to avoid rounding + // ....9999 to ...10000, which could cause a false halfway point. + add_native(big, 10, 1); + count++; +} + +// parse the significant digits into a big integer +inline void parse_mantissa(bigint& result, parsed_number_string& num, size_t max_digits, size_t& digits) noexcept { + // try to minimize the number of big integer and scalar multiplication. + // therefore, try to parse 8 digits at a time, and multiply by the largest + // scalar value (9 or 19 digits) for each step. + size_t counter = 0; + digits = 0; + limb value = 0; +#ifdef FASTFLOAT_64BIT_LIMB + size_t step = 19; +#else + size_t step = 9; +#endif + + // process all integer digits. + const char* p = num.integer.ptr; + const char* pend = p + num.integer.len(); + skip_zeros(p, pend); + // process all digits, in increments of step per loop + while (p != pend) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); + } + while (counter < step && p != pend && digits < max_digits) { + parse_one_digit(p, value, counter, digits); + } + if (digits == max_digits) { + // add the temporary value, then check if we've truncated any digits + add_native(result, limb(powers_of_ten_uint64[counter]), value); + bool truncated = is_truncated(p, pend); + if (num.fraction.ptr != nullptr) { + truncated |= is_truncated(num.fraction); + } + if (truncated) { + round_up_bigint(result, digits); + } + return; + } else { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + counter = 0; + value = 0; + } + } + + // add our fraction digits, if they're available. + if (num.fraction.ptr != nullptr) { + p = num.fraction.ptr; + pend = p + num.fraction.len(); + if (digits == 0) { + skip_zeros(p, pend); + } + // process all digits, in increments of step per loop + while (p != pend) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); + } + while (counter < step && p != pend && digits < max_digits) { + parse_one_digit(p, value, counter, digits); + } + if (digits == max_digits) { + // add the temporary value, then check if we've truncated any digits + add_native(result, limb(powers_of_ten_uint64[counter]), value); + bool truncated = is_truncated(p, pend); + if (truncated) { + round_up_bigint(result, digits); + } + return; + } else { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + counter = 0; + value = 0; + } + } + } + + if (counter != 0) { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + } +} + +template +inline adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept { + FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent))); + adjusted_mantissa answer; + bool truncated; + answer.mantissa = bigmant.hi64(truncated); + int bias = binary_format::mantissa_explicit_bits() - binary_format::minimum_exponent(); + answer.power2 = bigmant.bit_length() - 64 + bias; + + round(answer, [truncated](adjusted_mantissa& a, int32_t shift) { + round_nearest_tie_even(a, shift, [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool { + return is_above || (is_halfway && truncated) || (is_odd && is_halfway); + }); + }); + + return answer; +} + +// the scaling here is quite simple: we have, for the real digits `m * 10^e`, +// and for the theoretical digits `n * 2^f`. Since `e` is always negative, +// to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`. +// we then need to scale by `2^(f- e)`, and then the two significant digits +// are of the same magnitude. +template +inline adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept { + bigint& real_digits = bigmant; + int32_t real_exp = exponent; + + // get the value of `b`, rounded down, and get a bigint representation of b+h + adjusted_mantissa am_b = am; + // gcc7 buf: use a lambda to remove the noexcept qualifier bug with -Wnoexcept-type. + round(am_b, [](adjusted_mantissa&a, int32_t shift) { round_down(a, shift); }); + T b; + to_float(false, am_b, b); + adjusted_mantissa theor = to_extended_halfway(b); + bigint theor_digits(theor.mantissa); + int32_t theor_exp = theor.power2; + + // scale real digits and theor digits to be same power. + int32_t pow2_exp = theor_exp - real_exp; + uint32_t pow5_exp = uint32_t(-real_exp); + if (pow5_exp != 0) { + FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp)); + } + if (pow2_exp > 0) { + FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp))); + } else if (pow2_exp < 0) { + FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp))); + } + + // compare digits, and use it to director rounding + int ord = real_digits.compare(theor_digits); + adjusted_mantissa answer = am; + round(answer, [ord](adjusted_mantissa& a, int32_t shift) { + round_nearest_tie_even(a, shift, [ord](bool is_odd, bool _, bool __) -> bool { + (void)_; // not needed, since we've done our comparison + (void)__; // not needed, since we've done our comparison + if (ord > 0) { + return true; + } else if (ord < 0) { + return false; + } else { + return is_odd; + } + }); + }); + + return answer; +} + +// parse the significant digits as a big integer to unambiguously round the +// the significant digits. here, we are trying to determine how to round +// an extended float representation close to `b+h`, halfway between `b` +// (the float rounded-down) and `b+u`, the next positive float. this +// algorithm is always correct, and uses one of two approaches. when +// the exponent is positive relative to the significant digits (such as +// 1234), we create a big-integer representation, get the high 64-bits, +// determine if any lower bits are truncated, and use that to direct +// rounding. in case of a negative exponent relative to the significant +// digits (such as 1.2345), we create a theoretical representation of +// `b` as a big-integer type, scaled to the same binary exponent as +// the actual digits. we then compare the big integer representations +// of both, and use that to direct rounding. +template +inline adjusted_mantissa digit_comp(parsed_number_string& num, adjusted_mantissa am) noexcept { + // remove the invalid exponent bias + am.power2 -= invalid_am_bias; + + int32_t sci_exp = scientific_exponent(num); + size_t max_digits = binary_format::max_digits(); + size_t digits = 0; + bigint bigmant; + parse_mantissa(bigmant, num, max_digits, digits); + // can't underflow, since digits is at most max_digits. + int32_t exponent = sci_exp + 1 - int32_t(digits); + if (exponent >= 0) { + return positive_digit_comp(bigmant, exponent); + } else { + return negative_digit_comp(bigmant, am, exponent); + } +} + +} // namespace fast_float + +#endif + + +#ifndef FASTFLOAT_PARSE_NUMBER_H +#define FASTFLOAT_PARSE_NUMBER_H + + +//included above: +//#include +//included above: +//#include +//included above: +//#include +//included above: +//#include + +namespace fast_float { + + +namespace detail { +/** + * Special case +inf, -inf, nan, infinity, -infinity. + * The case comparisons could be made much faster given that we know that the + * strings a null-free and fixed. + **/ +template +from_chars_result parse_infnan(const char *first, const char *last, T &value) noexcept { + from_chars_result answer; + answer.ptr = first; + answer.ec = std::errc(); // be optimistic + bool minusSign = false; + if (*first == '-') { // assume first < last, so dereference without checks; C++17 20.19.3.(7.1) explicitly forbids '+' here + minusSign = true; + ++first; + } + if (last - first >= 3) { + if (fastfloat_strncasecmp(first, "nan", 3)) { + answer.ptr = (first += 3); + value = minusSign ? -std::numeric_limits::quiet_NaN() : std::numeric_limits::quiet_NaN(); + // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). + if(first != last && *first == '(') { + for(const char* ptr = first + 1; ptr != last; ++ptr) { + if (*ptr == ')') { + answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) + break; + } + else if(!(('a' <= *ptr && *ptr <= 'z') || ('A' <= *ptr && *ptr <= 'Z') || ('0' <= *ptr && *ptr <= '9') || *ptr == '_')) + break; // forbidden char, not nan(n-char-seq-opt) + } + } + return answer; + } + if (fastfloat_strncasecmp(first, "inf", 3)) { + if ((last - first >= 8) && fastfloat_strncasecmp(first + 3, "inity", 5)) { + answer.ptr = first + 8; + } else { + answer.ptr = first + 3; + } + value = minusSign ? -std::numeric_limits::infinity() : std::numeric_limits::infinity(); + return answer; + } + } + answer.ec = std::errc::invalid_argument; + return answer; +} + +} // namespace detail + +template +from_chars_result from_chars(const char *first, const char *last, + T &value, chars_format fmt /*= chars_format::general*/) noexcept { + return from_chars_advanced(first, last, value, parse_options{fmt}); +} + +template +from_chars_result from_chars_advanced(const char *first, const char *last, + T &value, parse_options options) noexcept { + + static_assert (std::is_same::value || std::is_same::value, "only float and double are supported"); + + + from_chars_result answer; + if (first == last) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + parsed_number_string pns = parse_number_string(first, last, options); + if (!pns.valid) { + return detail::parse_infnan(first, last, value); + } + answer.ec = std::errc(); // be optimistic + answer.ptr = pns.lastmatch; + // Next is Clinger's fast path. + if (binary_format::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format::max_exponent_fast_path() && pns.mantissa <=binary_format::max_mantissa_fast_path() && !pns.too_many_digits) { + value = T(pns.mantissa); + if (pns.exponent < 0) { value = value / binary_format::exact_power_of_ten(-pns.exponent); } + else { value = value * binary_format::exact_power_of_ten(pns.exponent); } + if (pns.negative) { value = -value; } + return answer; + } + adjusted_mantissa am = compute_float>(pns.exponent, pns.mantissa); + if(pns.too_many_digits && am.power2 >= 0) { + if(am != compute_float>(pns.exponent, pns.mantissa + 1)) { + am = compute_error>(pns.exponent, pns.mantissa); + } + } + // If we called compute_float>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0), + // then we need to go the long way around again. This is very uncommon. + if(am.power2 < 0) { am = digit_comp(pns, am); } + to_float(pns.negative, am, value); + return answer; +} + +} // namespace fast_float + +#endif + +#ifdef _MSC_VER +# pragma warning(pop) +#elif defined(__clang__) || defined(__APPLE_CC__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif // _C4_EXT_FAST_FLOAT_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/ext/fast_float.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/std/vector_fwd.hpp +// https://github.com/biojppm/c4core/src/c4/std/vector_fwd.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_STD_VECTOR_FWD_HPP_ +#define _C4_STD_VECTOR_FWD_HPP_ + +/** @file vector_fwd.hpp */ + +//included above: +//#include + +// forward declarations for std::vector +#if defined(__GLIBCXX__) || defined(__GLIBCPP__) || defined(_MSC_VER) +#if defined(_MSC_VER) +__pragma(warning(push)) +__pragma(warning(disable : 4643)) +#endif +namespace std { +template class allocator; +template class vector; +} // namespace std +#if defined(_MSC_VER) +__pragma(warning(pop)) +#endif +#elif defined(_LIBCPP_ABI_NAMESPACE) +namespace std { +inline namespace _LIBCPP_ABI_NAMESPACE { +template class allocator; +template class vector; +} // namespace _LIBCPP_ABI_NAMESPACE +} // namespace std +#else +#error "unknown standard library" +#endif + +#ifndef C4CORE_SINGLE_HEADER +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/substr_fwd.hpp +//#include "c4/substr_fwd.hpp" +#if !defined(C4_SUBSTR_FWD_HPP_) && !defined(_C4_SUBSTR_FWD_HPP_) +#error "amalgamate: file c4/substr_fwd.hpp must have been included at this point" +#endif /* C4_SUBSTR_FWD_HPP_ */ + +#endif + +namespace c4 { + +template c4::substr to_substr(std::vector &vec); +template c4::csubstr to_csubstr(std::vector const& vec); + +template bool operator!= (c4::csubstr ss, std::vector const& s); +template bool operator== (c4::csubstr ss, std::vector const& s); +template bool operator>= (c4::csubstr ss, std::vector const& s); +template bool operator> (c4::csubstr ss, std::vector const& s); +template bool operator<= (c4::csubstr ss, std::vector const& s); +template bool operator< (c4::csubstr ss, std::vector const& s); + +template bool operator!= (std::vector const& s, c4::csubstr ss); +template bool operator== (std::vector const& s, c4::csubstr ss); +template bool operator>= (std::vector const& s, c4::csubstr ss); +template bool operator> (std::vector const& s, c4::csubstr ss); +template bool operator<= (std::vector const& s, c4::csubstr ss); +template bool operator< (std::vector const& s, c4::csubstr ss); + +template size_t to_chars(c4::substr buf, std::vector const& s); +template bool from_chars(c4::csubstr buf, std::vector * s); + +} // namespace c4 + +#endif // _C4_STD_VECTOR_FWD_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/std/vector_fwd.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/std/string_fwd.hpp +// https://github.com/biojppm/c4core/src/c4/std/string_fwd.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_STD_STRING_FWD_HPP_ +#define _C4_STD_STRING_FWD_HPP_ + +/** @file string_fwd.hpp */ + +#ifndef DOXYGEN + +#ifndef C4CORE_SINGLE_HEADER +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/substr_fwd.hpp +//#include "c4/substr_fwd.hpp" +#if !defined(C4_SUBSTR_FWD_HPP_) && !defined(_C4_SUBSTR_FWD_HPP_) +#error "amalgamate: file c4/substr_fwd.hpp must have been included at this point" +#endif /* C4_SUBSTR_FWD_HPP_ */ + +#endif + +//included above: +//#include + +// forward declarations for std::string +#if defined(__GLIBCXX__) || defined(__GLIBCPP__) +#include // use the fwd header in glibcxx +#elif defined(_LIBCPP_VERSION) || defined(__APPLE_CC__) +#include // use the fwd header in stdlibc++ +#elif defined(_MSC_VER) +//! @todo is there a fwd header in msvc? +namespace std { +template struct char_traits; +template class allocator; +template class basic_string; +using string = basic_string, allocator>; +} /* namespace std */ +#else +#error "unknown standard library" +#endif + +namespace c4 { + +C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept; +C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string const& s) noexcept; + +bool operator== (c4::csubstr ss, std::string const& s); +bool operator!= (c4::csubstr ss, std::string const& s); +bool operator>= (c4::csubstr ss, std::string const& s); +bool operator> (c4::csubstr ss, std::string const& s); +bool operator<= (c4::csubstr ss, std::string const& s); +bool operator< (c4::csubstr ss, std::string const& s); + +bool operator== (std::string const& s, c4::csubstr ss); +bool operator!= (std::string const& s, c4::csubstr ss); +bool operator>= (std::string const& s, c4::csubstr ss); +bool operator> (std::string const& s, c4::csubstr ss); +bool operator<= (std::string const& s, c4::csubstr ss); +bool operator< (std::string const& s, c4::csubstr ss); + +size_t to_chars(c4::substr buf, std::string const& s); +bool from_chars(c4::csubstr buf, std::string * s); + +} // namespace c4 + +#endif // DOXYGEN +#endif // _C4_STD_STRING_FWD_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/std/string_fwd.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/std/std_fwd.hpp +// https://github.com/biojppm/c4core/src/c4/std/std_fwd.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_STD_STD_FWD_HPP_ +#define _C4_STD_STD_FWD_HPP_ + +/** @file std_fwd.hpp includes all c4-std interop fwd files */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/std/vector_fwd.hpp +//#include "c4/std/vector_fwd.hpp" +#if !defined(C4_STD_VECTOR_FWD_HPP_) && !defined(_C4_STD_VECTOR_FWD_HPP_) +#error "amalgamate: file c4/std/vector_fwd.hpp must have been included at this point" +#endif /* C4_STD_VECTOR_FWD_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/std/string_fwd.hpp +//#include "c4/std/string_fwd.hpp" +#if !defined(C4_STD_STRING_FWD_HPP_) && !defined(_C4_STD_STRING_FWD_HPP_) +#error "amalgamate: file c4/std/string_fwd.hpp must have been included at this point" +#endif /* C4_STD_STRING_FWD_HPP_ */ + +//#include "c4/std/tuple_fwd.hpp" + +#endif // _C4_STD_STD_FWD_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/std/std_fwd.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/charconv.hpp +// https://github.com/biojppm/c4core/src/c4/charconv.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_CHARCONV_HPP_ +#define _C4_CHARCONV_HPP_ + +/** @file charconv.hpp Lightweight generic type-safe wrappers for + * converting individual values to/from strings. + * + * These are the main functions: + * + * @code{.cpp} + * // Convert the given value, writing into the string. + * // The resulting string will NOT be null-terminated. + * // Return the number of characters needed. + * // This function is safe to call when the string is too small - + * // no writes will occur beyond the string's last character. + * template size_t c4::to_chars(substr buf, T const& C4_RESTRICT val); + * + * + * // Convert the given value to a string using to_chars(), and + * // return the resulting string, up to and including the last + * // written character. + * template substr c4::to_chars_sub(substr buf, T const& C4_RESTRICT val); + * + * + * // Read a value from the string, which must be + * // trimmed to the value (ie, no leading/trailing whitespace). + * // return true if the conversion succeeded. + * // There is no check for overflow; the value wraps around in a way similar + * // to the standard C/C++ overflow behavior. For example, + * // from_chars("128", &val) returns true and val will be + * // set tot 0. + * template bool c4::from_chars(csubstr buf, T * C4_RESTRICT val); + * + * + * // Read the first valid sequence of characters from the string, + * // skipping leading whitespace, and convert it using from_chars(). + * // Return the number of characters read for converting. + * template size_t c4::from_chars_first(csubstr buf, T * C4_RESTRICT val); + * @endcode + */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/language.hpp +//#include "c4/language.hpp" +#if !defined(C4_LANGUAGE_HPP_) && !defined(_C4_LANGUAGE_HPP_) +#error "amalgamate: file c4/language.hpp must have been included at this point" +#endif /* C4_LANGUAGE_HPP_ */ + +//included above: +//#include +//included above: +//#include +//included above: +//#include +//included above: +//#include +//included above: +//#include + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/config.hpp +//#include "c4/config.hpp" +#if !defined(C4_CONFIG_HPP_) && !defined(_C4_CONFIG_HPP_) +#error "amalgamate: file c4/config.hpp must have been included at this point" +#endif /* C4_CONFIG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/substr.hpp +//#include "c4/substr.hpp" +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/std/std_fwd.hpp +//#include "c4/std/std_fwd.hpp" +#if !defined(C4_STD_STD_FWD_HPP_) && !defined(_C4_STD_STD_FWD_HPP_) +#error "amalgamate: file c4/std/std_fwd.hpp must have been included at this point" +#endif /* C4_STD_STD_FWD_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/memory_util.hpp +//#include "c4/memory_util.hpp" +#if !defined(C4_MEMORY_UTIL_HPP_) && !defined(_C4_MEMORY_UTIL_HPP_) +#error "amalgamate: file c4/memory_util.hpp must have been included at this point" +#endif /* C4_MEMORY_UTIL_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/szconv.hpp +//#include "c4/szconv.hpp" +#if !defined(C4_SZCONV_HPP_) && !defined(_C4_SZCONV_HPP_) +#error "amalgamate: file c4/szconv.hpp must have been included at this point" +#endif /* C4_SZCONV_HPP_ */ + + +#ifndef C4CORE_NO_FAST_FLOAT +# if (C4_CPP >= 17) +# if defined(_MSC_VER) +# if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros +# include +# define C4CORE_HAVE_STD_TOCHARS 1 +# define C4CORE_HAVE_STD_FROMCHARS 0 // prefer fast_float with MSVC +# define C4CORE_HAVE_FAST_FLOAT 1 +# else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 1 +# endif +# else +# if __has_include() +//included above: +//# include +# if defined(__cpp_lib_to_chars) +# define C4CORE_HAVE_STD_TOCHARS 1 +# define C4CORE_HAVE_STD_FROMCHARS 0 // glibc uses fast_float internally +# define C4CORE_HAVE_FAST_FLOAT 1 +# else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 1 +# endif +# else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 1 +# endif +# endif +# else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 1 +# endif +# if C4CORE_HAVE_FAST_FLOAT + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wsign-conversion") + C4_SUPPRESS_WARNING_GCC("-Warray-bounds") +# if __GNUC__ >= 5 + C4_SUPPRESS_WARNING_GCC("-Wshift-count-overflow") +# endif +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/ext/fast_float.hpp +//# include "c4/ext/fast_float.hpp" +#if !defined(C4_EXT_FAST_FLOAT_HPP_) && !defined(_C4_EXT_FAST_FLOAT_HPP_) +#error "amalgamate: file c4/ext/fast_float.hpp must have been included at this point" +#endif /* C4_EXT_FAST_FLOAT_HPP_ */ + + C4_SUPPRESS_WARNING_GCC_POP +# endif +#elif (C4_CPP >= 17) +# define C4CORE_HAVE_FAST_FLOAT 0 +# if defined(_MSC_VER) +# if (C4_MSVC_VERSION >= C4_MSVC_VERSION_2019) // VS2017 and lower do not have these macros +//included above: +//# include +# define C4CORE_HAVE_STD_TOCHARS 1 +# define C4CORE_HAVE_STD_FROMCHARS 1 +# else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# endif +# else +# if __has_include() +//included above: +//# include +# if defined(__cpp_lib_to_chars) +# define C4CORE_HAVE_STD_TOCHARS 1 +# define C4CORE_HAVE_STD_FROMCHARS 1 // glibc uses fast_float internally +# else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# endif +# else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# endif +# endif +#else +# define C4CORE_HAVE_STD_TOCHARS 0 +# define C4CORE_HAVE_STD_FROMCHARS 0 +# define C4CORE_HAVE_FAST_FLOAT 0 +#endif + + +#if !C4CORE_HAVE_STD_FROMCHARS +#include +#endif + + +#ifdef _MSC_VER +# pragma warning(push) +# if C4_MSVC_VERSION != C4_MSVC_VERSION_2017 +# pragma warning(disable: 4800) //'int': forcing value to bool 'true' or 'false' (performance warning) +# endif +# pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe +#elif defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wtautological-constant-out-of-range-compare" +# pragma clang diagnostic ignored "-Wformat-nonliteral" +# pragma clang diagnostic ignored "-Wdouble-promotion" // implicit conversion increases floating-point precision +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +# pragma GCC diagnostic ignored "-Wdouble-promotion" // implicit conversion increases floating-point precision +# pragma GCC diagnostic ignored "-Wuseless-cast" +#endif + + +namespace c4 { + +#if C4CORE_HAVE_STD_TOCHARS +/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */ +typedef enum : std::underlying_type::type { + /** print the real number in floating point format (like %f) */ + FTOA_FLOAT = static_cast::type>(std::chars_format::fixed), + /** print the real number in scientific format (like %e) */ + FTOA_SCIENT = static_cast::type>(std::chars_format::scientific), + /** print the real number in flexible format (like %g) */ + FTOA_FLEX = static_cast::type>(std::chars_format::general), + /** print the real number in hexadecimal format (like %a) */ + FTOA_HEXA = static_cast::type>(std::chars_format::hex), +} RealFormat_e; +#else +/** @warning Use only the symbol. Do not rely on the type or naked value of this enum. */ +typedef enum : char { + /** print the real number in floating point format (like %f) */ + FTOA_FLOAT = 'f', + /** print the real number in scientific format (like %e) */ + FTOA_SCIENT = 'e', + /** print the real number in flexible format (like %g) */ + FTOA_FLEX = 'g', + /** print the real number in hexadecimal format (like %a) */ + FTOA_HEXA = 'a', +} RealFormat_e; +#endif + + +/** in some platforms, int,unsigned int + * are not any of int8_t...int64_t and + * long,unsigned long are not any of uint8_t...uint64_t */ +template +struct is_fixed_length +{ + enum : bool { + /** true if T is one of the fixed length signed types */ + value_i = (std::is_integral::value + && (std::is_same::value + || std::is_same::value + || std::is_same::value + || std::is_same::value)), + /** true if T is one of the fixed length unsigned types */ + value_u = (std::is_integral::value + && (std::is_same::value + || std::is_same::value + || std::is_same::value + || std::is_same::value)), + /** true if T is one of the fixed length signed or unsigned types */ + value = value_i || value_u + }; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +#ifdef _MSC_VER +# pragma warning(push) +#elif defined(__clang__) +# pragma clang diagnostic push +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wconversion" +# if __GNUC__ >= 6 +# pragma GCC diagnostic ignored "-Wnull-dereference" +# endif +#endif + +namespace detail { + +/* python command to get the values below: +def dec(v): + return str(v) +for bits in (8, 16, 32, 64): + imin, imax, umax = (-(1 << (bits - 1))), (1 << (bits - 1)) - 1, (1 << bits) - 1 + for vname, v in (("imin", imin), ("imax", imax), ("umax", umax)): + for f in (bin, oct, dec, hex): + print(f"{bits}b: {vname}={v} {f.__name__}: len={len(f(v)):2d}: {v} {f(v)}") +*/ + +// do not use the type as the template argument because in some +// platforms long!=int32 and long!=int64. Just use the numbytes +// which is more generic and spares lengthy SFINAE code. +template struct charconv_digits_; +template using charconv_digits = charconv_digits_::value>; + +template<> struct charconv_digits_<1u, true> // int8_t +{ + enum : size_t { + maxdigits_bin = 1 + 2 + 8, // -128==-0b10000000 + maxdigits_oct = 1 + 2 + 3, // -128==-0o200 + maxdigits_dec = 1 + 3, // -128 + maxdigits_hex = 1 + 2 + 2, // -128==-0x80 + maxdigits_bin_nopfx = 8, // -128==-0b10000000 + maxdigits_oct_nopfx = 3, // -128==-0o200 + maxdigits_dec_nopfx = 3, // -128 + maxdigits_hex_nopfx = 2, // -128==-0x80 + }; + // min values without sign! + static constexpr csubstr min_value_dec() noexcept { return csubstr("128"); } + static constexpr csubstr min_value_hex() noexcept { return csubstr("80"); } + static constexpr csubstr min_value_oct() noexcept { return csubstr("200"); } + static constexpr csubstr min_value_bin() noexcept { return csubstr("10000000"); } + static constexpr csubstr max_value_dec() noexcept { return csubstr("127"); } + static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 3) || (str.len == 3 && str[0] <= '1')); } +}; +template<> struct charconv_digits_<1u, false> // uint8_t +{ + enum : size_t { + maxdigits_bin = 2 + 8, // 255 0b11111111 + maxdigits_oct = 2 + 3, // 255 0o377 + maxdigits_dec = 3, // 255 + maxdigits_hex = 2 + 2, // 255 0xff + maxdigits_bin_nopfx = 8, // 255 0b11111111 + maxdigits_oct_nopfx = 3, // 255 0o377 + maxdigits_dec_nopfx = 3, // 255 + maxdigits_hex_nopfx = 2, // 255 0xff + }; + static constexpr csubstr max_value_dec() noexcept { return csubstr("255"); } + static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 3) || (str.len == 3 && str[0] <= '3')); } +}; +template<> struct charconv_digits_<2u, true> // int16_t +{ + enum : size_t { + maxdigits_bin = 1 + 2 + 16, // -32768 -0b1000000000000000 + maxdigits_oct = 1 + 2 + 6, // -32768 -0o100000 + maxdigits_dec = 1 + 5, // -32768 -32768 + maxdigits_hex = 1 + 2 + 4, // -32768 -0x8000 + maxdigits_bin_nopfx = 16, // -32768 -0b1000000000000000 + maxdigits_oct_nopfx = 6, // -32768 -0o100000 + maxdigits_dec_nopfx = 5, // -32768 -32768 + maxdigits_hex_nopfx = 4, // -32768 -0x8000 + }; + // min values without sign! + static constexpr csubstr min_value_dec() noexcept { return csubstr("32768"); } + static constexpr csubstr min_value_hex() noexcept { return csubstr("8000"); } + static constexpr csubstr min_value_oct() noexcept { return csubstr("100000"); } + static constexpr csubstr min_value_bin() noexcept { return csubstr("1000000000000000"); } + static constexpr csubstr max_value_dec() noexcept { return csubstr("32767"); } + static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 6)); } +}; +template<> struct charconv_digits_<2u, false> // uint16_t +{ + enum : size_t { + maxdigits_bin = 2 + 16, // 65535 0b1111111111111111 + maxdigits_oct = 2 + 6, // 65535 0o177777 + maxdigits_dec = 6, // 65535 65535 + maxdigits_hex = 2 + 4, // 65535 0xffff + maxdigits_bin_nopfx = 16, // 65535 0b1111111111111111 + maxdigits_oct_nopfx = 6, // 65535 0o177777 + maxdigits_dec_nopfx = 6, // 65535 65535 + maxdigits_hex_nopfx = 4, // 65535 0xffff + }; + static constexpr csubstr max_value_dec() noexcept { return csubstr("65535"); } + static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 6) || (str.len == 6 && str[0] <= '1')); } +}; +template<> struct charconv_digits_<4u, true> // int32_t +{ + enum : size_t { + maxdigits_bin = 1 + 2 + 32, // len=35: -2147483648 -0b10000000000000000000000000000000 + maxdigits_oct = 1 + 2 + 11, // len=14: -2147483648 -0o20000000000 + maxdigits_dec = 1 + 10, // len=11: -2147483648 -2147483648 + maxdigits_hex = 1 + 2 + 8, // len=11: -2147483648 -0x80000000 + maxdigits_bin_nopfx = 32, // len=35: -2147483648 -0b10000000000000000000000000000000 + maxdigits_oct_nopfx = 11, // len=14: -2147483648 -0o20000000000 + maxdigits_dec_nopfx = 10, // len=11: -2147483648 -2147483648 + maxdigits_hex_nopfx = 8, // len=11: -2147483648 -0x80000000 + }; + // min values without sign! + static constexpr csubstr min_value_dec() noexcept { return csubstr("2147483648"); } + static constexpr csubstr min_value_hex() noexcept { return csubstr("80000000"); } + static constexpr csubstr min_value_oct() noexcept { return csubstr("20000000000"); } + static constexpr csubstr min_value_bin() noexcept { return csubstr("10000000000000000000000000000000"); } + static constexpr csubstr max_value_dec() noexcept { return csubstr("2147483647"); } + static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 11) || (str.len == 11 && str[0] <= '1')); } +}; +template<> struct charconv_digits_<4u, false> // uint32_t +{ + enum : size_t { + maxdigits_bin = 2 + 32, // len=34: 4294967295 0b11111111111111111111111111111111 + maxdigits_oct = 2 + 11, // len=13: 4294967295 0o37777777777 + maxdigits_dec = 10, // len=10: 4294967295 4294967295 + maxdigits_hex = 2 + 8, // len=10: 4294967295 0xffffffff + maxdigits_bin_nopfx = 32, // len=34: 4294967295 0b11111111111111111111111111111111 + maxdigits_oct_nopfx = 11, // len=13: 4294967295 0o37777777777 + maxdigits_dec_nopfx = 10, // len=10: 4294967295 4294967295 + maxdigits_hex_nopfx = 8, // len=10: 4294967295 0xffffffff + }; + static constexpr csubstr max_value_dec() noexcept { return csubstr("4294967295"); } + static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 11) || (str.len == 11 && str[0] <= '3')); } +}; +template<> struct charconv_digits_<8u, true> // int32_t +{ + enum : size_t { + maxdigits_bin = 1 + 2 + 64, // len=67: -9223372036854775808 -0b1000000000000000000000000000000000000000000000000000000000000000 + maxdigits_oct = 1 + 2 + 22, // len=25: -9223372036854775808 -0o1000000000000000000000 + maxdigits_dec = 1 + 19, // len=20: -9223372036854775808 -9223372036854775808 + maxdigits_hex = 1 + 2 + 16, // len=19: -9223372036854775808 -0x8000000000000000 + maxdigits_bin_nopfx = 64, // len=67: -9223372036854775808 -0b1000000000000000000000000000000000000000000000000000000000000000 + maxdigits_oct_nopfx = 22, // len=25: -9223372036854775808 -0o1000000000000000000000 + maxdigits_dec_nopfx = 19, // len=20: -9223372036854775808 -9223372036854775808 + maxdigits_hex_nopfx = 16, // len=19: -9223372036854775808 -0x8000000000000000 + }; + static constexpr csubstr min_value_dec() noexcept { return csubstr("9223372036854775808"); } + static constexpr csubstr min_value_hex() noexcept { return csubstr("8000000000000000"); } + static constexpr csubstr min_value_oct() noexcept { return csubstr("1000000000000000000000"); } + static constexpr csubstr min_value_bin() noexcept { return csubstr("1000000000000000000000000000000000000000000000000000000000000000"); } + static constexpr csubstr max_value_dec() noexcept { return csubstr("9223372036854775807"); } + static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 22)); } +}; +template<> struct charconv_digits_<8u, false> +{ + enum : size_t { + maxdigits_bin = 2 + 64, // len=66: 18446744073709551615 0b1111111111111111111111111111111111111111111111111111111111111111 + maxdigits_oct = 2 + 22, // len=24: 18446744073709551615 0o1777777777777777777777 + maxdigits_dec = 20, // len=20: 18446744073709551615 18446744073709551615 + maxdigits_hex = 2 + 16, // len=18: 18446744073709551615 0xffffffffffffffff + maxdigits_bin_nopfx = 64, // len=66: 18446744073709551615 0b1111111111111111111111111111111111111111111111111111111111111111 + maxdigits_oct_nopfx = 22, // len=24: 18446744073709551615 0o1777777777777777777777 + maxdigits_dec_nopfx = 20, // len=20: 18446744073709551615 18446744073709551615 + maxdigits_hex_nopfx = 16, // len=18: 18446744073709551615 0xffffffffffffffff + }; + static constexpr csubstr max_value_dec() noexcept { return csubstr("18446744073709551615"); } + static constexpr bool is_oct_overflow(csubstr str) noexcept { return !((str.len < 22) || (str.len == 22 && str[0] <= '1')); } +}; +} // namespace detail + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// Helper macros, undefined below +#define _c4append(c) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = static_cast(c); } else { ++pos; } } +#define _c4appendhex(i) { if(C4_LIKELY(pos < buf.len)) { buf.str[pos++] = hexchars[i]; } else { ++pos; } } + +/** @name digits_dec return the number of digits required to encode a + * decimal number. + * + * @note At first sight this code may look heavily branchy and + * therefore inefficient. However, measurements revealed this to be + * the fastest among the alternatives. + * + * @see https://github.com/biojppm/c4core/pull/77 */ +/** @{ */ + +template +C4_CONSTEXPR14 C4_ALWAYS_INLINE +auto digits_dec(T v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + return ((v >= 100) ? 3u : ((v >= 10) ? 2u : 1u)); +} + +template +C4_CONSTEXPR14 C4_ALWAYS_INLINE +auto digits_dec(T v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + return ((v >= 10000) ? 5u : (v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u); +} + +template +C4_CONSTEXPR14 C4_ALWAYS_INLINE +auto digits_dec(T v) noexcept + -> typename std::enable_if::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + return ((v >= 1000000000) ? 10u : (v >= 100000000) ? 9u : (v >= 10000000) ? 8u : + (v >= 1000000) ? 7u : (v >= 100000) ? 6u : (v >= 10000) ? 5u : + (v >= 1000) ? 4u : (v >= 100) ? 3u : (v >= 10) ? 2u : 1u); +} + +template +C4_CONSTEXPR14 C4_ALWAYS_INLINE +auto digits_dec(T v) noexcept + -> typename std::enable_if::type +{ + // thanks @fargies!!! + // https://github.com/biojppm/c4core/pull/77#issuecomment-1063753568 + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + if(v >= 1000000000) // 10 + { + if(v >= 100000000000000) // 15 [15-20] range + { + if(v >= 100000000000000000) // 18 (15 + (20 - 15) / 2) + { + if((typename std::make_unsigned::type)v >= 10000000000000000000u) // 20 + return 20u; + else + return (v >= 1000000000000000000) ? 19u : 18u; + } + else if(v >= 10000000000000000) // 17 + return 17u; + else + return(v >= 1000000000000000) ? 16u : 15u; + } + else if(v >= 1000000000000) // 13 + return (v >= 10000000000000) ? 14u : 13u; + else if(v >= 100000000000) // 12 + return 12; + else + return(v >= 10000000000) ? 11u : 10u; + } + else if(v >= 10000) // 5 [5-9] range + { + if(v >= 10000000) // 8 + return (v >= 100000000) ? 9u : 8u; + else if(v >= 1000000) // 7 + return 7; + else + return (v >= 100000) ? 6u : 5u; + } + else if(v >= 100) + return (v >= 1000) ? 4u : 3u; + else + return (v >= 10) ? 2u : 1u; +} + +/** @} */ + + +template +C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_hex(T v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + return v ? 1u + (msb((typename std::make_unsigned::type)v) >> 2u) : 1u; +} + +template +C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_bin(T v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + return v ? 1u + msb((typename std::make_unsigned::type)v) : 1u; +} + +template +C4_CONSTEXPR14 C4_ALWAYS_INLINE unsigned digits_oct(T v_) noexcept +{ + // TODO: is there a better way? + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v_ >= 0); + using U = typename + std::conditional::type>::type; + U v = (U) v_; // safe because we require v_ >= 0 + unsigned __n = 1; + const unsigned __b2 = 64u; + const unsigned __b3 = __b2 * 8u; + const unsigned long __b4 = __b3 * 8u; + while(true) + { + if(v < 8u) + return __n; + if(v < __b2) + return __n + 1; + if(v < __b3) + return __n + 2; + if(v < __b4) + return __n + 3; + v /= (U) __b4; + __n += 4; + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace detail { +C4_INLINE_CONSTEXPR const char hexchars[] = "0123456789abcdef"; +C4_INLINE_CONSTEXPR const char digits0099[] = + "0001020304050607080910111213141516171819" + "2021222324252627282930313233343536373839" + "4041424344454647484950515253545556575859" + "6061626364656667686970717273747576777879" + "8081828384858687888990919293949596979899"; +} // namespace detail + +C4_SUPPRESS_WARNING_GCC_PUSH +C4_SUPPRESS_WARNING_GCC("-Warray-bounds") // gcc has false positives here +#if (defined(__GNUC__) && (__GNUC__ >= 7)) +C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc has false positives here +#endif + +template +C4_HOT C4_ALWAYS_INLINE +void write_dec_unchecked(substr buf, T v, unsigned digits_v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + C4_ASSERT(buf.len >= digits_v); + C4_XASSERT(digits_v == digits_dec(v)); + // in bm_xtoa: checkoncelog_singlediv_write2 + while(v >= T(100)) + { + const T quo = v / T(100); + const auto num = (v - quo * T(100)) << 1u; + v = quo; + buf.str[--digits_v] = detail::digits0099[num + 1]; + buf.str[--digits_v] = detail::digits0099[num]; + } + if(v >= T(10)) + { + C4_ASSERT(digits_v == 2); + const auto num = v << 1u; + buf.str[1] = detail::digits0099[num + 1]; + buf.str[0] = detail::digits0099[num]; + } + else + { + C4_ASSERT(digits_v == 1); + buf.str[0] = (char)('0' + v); + } +} + + +template +C4_HOT C4_ALWAYS_INLINE +void write_hex_unchecked(substr buf, T v, unsigned digits_v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + C4_ASSERT(buf.len >= digits_v); + C4_XASSERT(digits_v == digits_hex(v)); + do { + buf.str[--digits_v] = detail::hexchars[v & T(15)]; + v >>= 4; + } while(v); + C4_ASSERT(digits_v == 0); +} + + +template +C4_HOT C4_ALWAYS_INLINE +void write_oct_unchecked(substr buf, T v, unsigned digits_v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + C4_ASSERT(buf.len >= digits_v); + C4_XASSERT(digits_v == digits_oct(v)); + do { + buf.str[--digits_v] = (char)('0' + (v & T(7))); + v >>= 3; + } while(v); + C4_ASSERT(digits_v == 0); +} + + +template +C4_HOT C4_ALWAYS_INLINE +void write_bin_unchecked(substr buf, T v, unsigned digits_v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + C4_ASSERT(buf.len >= digits_v); + C4_XASSERT(digits_v == digits_bin(v)); + do { + buf.str[--digits_v] = (char)('0' + (v & T(1))); + v >>= 1; + } while(v); + C4_ASSERT(digits_v == 0); +} + + +/** write an integer to a string in decimal format. This is the + * lowest level (and the fastest) function to do this task. + * @note does not accept negative numbers + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the required size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t write_dec(substr buf, T v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + unsigned digits = digits_dec(v); + if(C4_LIKELY(buf.len >= digits)) + write_dec_unchecked(buf, v, digits); + return digits; +} + +/** write an integer to a string in hexadecimal format. This is the + * lowest level (and the fastest) function to do this task. + * @note does not accept negative numbers + * @note does not prefix with 0x + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the required size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t write_hex(substr buf, T v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + unsigned digits = digits_hex(v); + if(C4_LIKELY(buf.len >= digits)) + write_hex_unchecked(buf, v, digits); + return digits; +} + +/** write an integer to a string in octal format. This is the + * lowest level (and the fastest) function to do this task. + * @note does not accept negative numbers + * @note does not prefix with 0o + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the required size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t write_oct(substr buf, T v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + unsigned digits = digits_oct(v); + if(C4_LIKELY(buf.len >= digits)) + write_oct_unchecked(buf, v, digits); + return digits; +} + +/** write an integer to a string in binary format. This is the + * lowest level (and the fastest) function to do this task. + * @note does not accept negative numbers + * @note does not prefix with 0b + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the required size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t write_bin(substr buf, T v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(v >= 0); + unsigned digits = digits_bin(v); + C4_ASSERT(digits > 0); + if(C4_LIKELY(buf.len >= digits)) + write_bin_unchecked(buf, v, digits); + return digits; +} + + +namespace detail { +template using NumberWriter = size_t (*)(substr, U); +template writer> +size_t write_num_digits(substr buf, T v, size_t num_digits) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + size_t ret = writer(buf, v); + if(ret >= num_digits) + return ret; + else if(ret >= buf.len || num_digits > buf.len) + return num_digits; + C4_ASSERT(num_digits >= ret); + size_t delta = static_cast(num_digits - ret); + memmove(buf.str + delta, buf.str, ret); + memset(buf.str, '0', delta); + return num_digits; +} +} // namespace detail + + +/** same as c4::write_dec(), but pad with zeroes on the left + * such that the resulting string is @p num_digits wide. + * If the given number is requires more than num_digits, then the number prevails. */ +template +C4_ALWAYS_INLINE size_t write_dec(substr buf, T val, size_t num_digits) noexcept +{ + return detail::write_num_digits>(buf, val, num_digits); +} + +/** same as c4::write_hex(), but pad with zeroes on the left + * such that the resulting string is @p num_digits wide. + * If the given number is requires more than num_digits, then the number prevails. */ +template +C4_ALWAYS_INLINE size_t write_hex(substr buf, T val, size_t num_digits) noexcept +{ + return detail::write_num_digits>(buf, val, num_digits); +} + +/** same as c4::write_bin(), but pad with zeroes on the left + * such that the resulting string is @p num_digits wide. + * If the given number is requires more than num_digits, then the number prevails. */ +template +C4_ALWAYS_INLINE size_t write_bin(substr buf, T val, size_t num_digits) noexcept +{ + return detail::write_num_digits>(buf, val, num_digits); +} + +/** same as c4::write_oct(), but pad with zeroes on the left + * such that the resulting string is @p num_digits wide. + * If the given number is requires more than num_digits, then the number prevails. */ +template +C4_ALWAYS_INLINE size_t write_oct(substr buf, T val, size_t num_digits) noexcept +{ + return detail::write_num_digits>(buf, val, num_digits); +} + +C4_SUPPRESS_WARNING_GCC_POP + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** read a decimal integer from a string. This is the + * lowest level (and the fastest) function to do this task. + * @note does not accept negative numbers + * @note The string must be trimmed. Whitespace is not accepted. + * @note the string must not be empty + * @note there is no check for overflow; the value wraps around + * in a way similar to the standard C/C++ overflow behavior. + * For example, `read_dec("128", &val)` returns true + * and val will be set to 0 because 127 is the max i8 value. + * @see overflows() to find out if a number string overflows a type range + * @return true if the conversion was successful (no overflow check) */ +template +C4_ALWAYS_INLINE bool read_dec(csubstr s, I *C4_RESTRICT v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(!s.empty()); + *v = 0; + for(char c : s) + { + if(C4_UNLIKELY(c < '0' || c > '9')) + return false; + *v = (*v) * I(10) + (I(c) - I('0')); + } + return true; +} + +/** read an hexadecimal integer from a string. This is the + * lowest level (and the fastest) function to do this task. + * @note does not accept negative numbers + * @note does not accept leading 0x or 0X + * @note the string must not be empty + * @note the string must be trimmed. Whitespace is not accepted. + * @note there is no check for overflow; the value wraps around + * in a way similar to the standard C/C++ overflow behavior. + * For example, `read_hex("80", &val)` returns true + * and val will be set to 0 because 7f is the max i8 value. + * @see overflows() to find out if a number string overflows a type range + * @return true if the conversion was successful (no overflow check) */ +template +C4_ALWAYS_INLINE bool read_hex(csubstr s, I *C4_RESTRICT v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(!s.empty()); + *v = 0; + for(char c : s) + { + I cv; + if(c >= '0' && c <= '9') + cv = I(c) - I('0'); + else if(c >= 'a' && c <= 'f') + cv = I(10) + (I(c) - I('a')); + else if(c >= 'A' && c <= 'F') + cv = I(10) + (I(c) - I('A')); + else + return false; + *v = (*v) * I(16) + cv; + } + return true; +} + +/** read a binary integer from a string. This is the + * lowest level (and the fastest) function to do this task. + * @note does not accept negative numbers + * @note does not accept leading 0b or 0B + * @note the string must not be empty + * @note the string must be trimmed. Whitespace is not accepted. + * @note there is no check for overflow; the value wraps around + * in a way similar to the standard C/C++ overflow behavior. + * For example, `read_bin("10000000", &val)` returns true + * and val will be set to 0 because 1111111 is the max i8 value. + * @see overflows() to find out if a number string overflows a type range + * @return true if the conversion was successful (no overflow check) */ +template +C4_ALWAYS_INLINE bool read_bin(csubstr s, I *C4_RESTRICT v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(!s.empty()); + *v = 0; + for(char c : s) + { + *v <<= 1; + if(c == '1') + *v |= 1; + else if(c != '0') + return false; + } + return true; +} + +/** read an octal integer from a string. This is the + * lowest level (and the fastest) function to do this task. + * @note does not accept negative numbers + * @note does not accept leading 0o or 0O + * @note the string must not be empty + * @note the string must be trimmed. Whitespace is not accepted. + * @note there is no check for overflow; the value wraps around + * in a way similar to the standard C/C++ overflow behavior. + * For example, `read_oct("200", &val)` returns true + * and val will be set to 0 because 177 is the max i8 value. + * @see overflows() to find out if a number string overflows a type range + * @return true if the conversion was successful (no overflow check) */ +template +C4_ALWAYS_INLINE bool read_oct(csubstr s, I *C4_RESTRICT v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_ASSERT(!s.empty()); + *v = 0; + for(char c : s) + { + if(C4_UNLIKELY(c < '0' || c > '7')) + return false; + *v = (*v) * I(8) + (I(c) - I('0')); + } + return true; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace detail { +inline size_t _itoa2buf(substr buf, size_t pos, csubstr val) noexcept +{ + C4_ASSERT(pos + val.len <= buf.len); + memcpy(buf.str + pos, val.str, val.len); + return pos + val.len; +} +inline size_t _itoa2bufwithdigits(substr buf, size_t pos, size_t num_digits, csubstr val) noexcept +{ + num_digits = num_digits > val.len ? num_digits - val.len : 0; + C4_ASSERT(num_digits + val.len <= buf.len); + for(size_t i = 0; i < num_digits; ++i) + _c4append('0'); + return detail::_itoa2buf(buf, pos, val); +} +template +C4_NO_INLINE size_t _itoadec2buf(substr buf) noexcept +{ + using digits_type = detail::charconv_digits; + if(C4_UNLIKELY(buf.len < digits_type::maxdigits_dec)) + return digits_type::maxdigits_dec; + buf.str[0] = '-'; + return detail::_itoa2buf(buf, 1, digits_type::min_value_dec()); +} +template +C4_NO_INLINE size_t _itoa2buf(substr buf, I radix) noexcept +{ + using digits_type = detail::charconv_digits; + size_t pos = 0; + if(C4_LIKELY(buf.len > 0)) + buf.str[pos++] = '-'; + switch(radix) + { + case I(10): + if(C4_UNLIKELY(buf.len < digits_type::maxdigits_dec)) + return digits_type::maxdigits_dec; + pos =_itoa2buf(buf, pos, digits_type::min_value_dec()); + break; + case I(16): + if(C4_UNLIKELY(buf.len < digits_type::maxdigits_hex)) + return digits_type::maxdigits_hex; + buf.str[pos++] = '0'; + buf.str[pos++] = 'x'; + pos = _itoa2buf(buf, pos, digits_type::min_value_hex()); + break; + case I( 2): + if(C4_UNLIKELY(buf.len < digits_type::maxdigits_bin)) + return digits_type::maxdigits_bin; + buf.str[pos++] = '0'; + buf.str[pos++] = 'b'; + pos = _itoa2buf(buf, pos, digits_type::min_value_bin()); + break; + case I( 8): + if(C4_UNLIKELY(buf.len < digits_type::maxdigits_oct)) + return digits_type::maxdigits_oct; + buf.str[pos++] = '0'; + buf.str[pos++] = 'o'; + pos = _itoa2buf(buf, pos, digits_type::min_value_oct()); + break; + } + return pos; +} +template +C4_NO_INLINE size_t _itoa2buf(substr buf, I radix, size_t num_digits) noexcept +{ + using digits_type = detail::charconv_digits; + size_t pos = 0; + size_t needed_digits = 0; + if(C4_LIKELY(buf.len > 0)) + buf.str[pos++] = '-'; + switch(radix) + { + case I(10): + // add 1 to account for - + needed_digits = num_digits+1 > digits_type::maxdigits_dec ? num_digits+1 : digits_type::maxdigits_dec; + if(C4_UNLIKELY(buf.len < needed_digits)) + return needed_digits; + pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_dec()); + break; + case I(16): + // add 3 to account for -0x + needed_digits = num_digits+3 > digits_type::maxdigits_hex ? num_digits+3 : digits_type::maxdigits_hex; + if(C4_UNLIKELY(buf.len < needed_digits)) + return needed_digits; + buf.str[pos++] = '0'; + buf.str[pos++] = 'x'; + pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_hex()); + break; + case I( 2): + // add 3 to account for -0b + needed_digits = num_digits+3 > digits_type::maxdigits_bin ? num_digits+3 : digits_type::maxdigits_bin; + if(C4_UNLIKELY(buf.len < needed_digits)) + return needed_digits; + C4_ASSERT(buf.len >= digits_type::maxdigits_bin); + buf.str[pos++] = '0'; + buf.str[pos++] = 'b'; + pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_bin()); + break; + case I( 8): + // add 3 to account for -0o + needed_digits = num_digits+3 > digits_type::maxdigits_oct ? num_digits+3 : digits_type::maxdigits_oct; + if(C4_UNLIKELY(buf.len < needed_digits)) + return needed_digits; + C4_ASSERT(buf.len >= digits_type::maxdigits_oct); + buf.str[pos++] = '0'; + buf.str[pos++] = 'o'; + pos = _itoa2bufwithdigits(buf, pos, num_digits, digits_type::min_value_oct()); + break; + } + return pos; +} +} // namespace detail + + +/** convert an integral signed decimal to a string. + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t itoa(substr buf, T v) noexcept +{ + C4_STATIC_ASSERT(std::is_signed::value); + if(v >= T(0)) + { + // write_dec() checks the buffer size, so no need to check here + return write_dec(buf, v); + } + // when T is the min value (eg i8: -128), negating it + // will overflow, so treat the min as a special case + else if(C4_LIKELY(v != std::numeric_limits::min())) + { + v = -v; + unsigned digits = digits_dec(v); + if(C4_LIKELY(buf.len >= digits + 1u)) + { + buf.str[0] = '-'; + write_dec_unchecked(buf.sub(1), v, digits); + } + return digits + 1u; + } + return detail::_itoadec2buf(buf); +} + +/** convert an integral signed integer to a string, using a specific + * radix. The radix must be 2, 8, 10 or 16. + * + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix) noexcept +{ + C4_STATIC_ASSERT(std::is_signed::value); + C4_ASSERT(radix == 2 || radix == 8 || radix == 10 || radix == 16); + C4_SUPPRESS_WARNING_GCC_PUSH + #if (defined(__GNUC__) && (__GNUC__ >= 7)) + C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc has a false positive here + #endif + // when T is the min value (eg i8: -128), negating it + // will overflow, so treat the min as a special case + if(C4_LIKELY(v != std::numeric_limits::min())) + { + unsigned pos = 0; + if(v < 0) + { + v = -v; + if(C4_LIKELY(buf.len > 0)) + buf.str[pos] = '-'; + ++pos; + } + unsigned digits = 0; + switch(radix) + { + case T(10): + digits = digits_dec(v); + if(C4_LIKELY(buf.len >= pos + digits)) + write_dec_unchecked(buf.sub(pos), v, digits); + break; + case T(16): + digits = digits_hex(v); + if(C4_LIKELY(buf.len >= pos + 2u + digits)) + { + buf.str[pos + 0] = '0'; + buf.str[pos + 1] = 'x'; + write_hex_unchecked(buf.sub(pos + 2), v, digits); + } + digits += 2u; + break; + case T(2): + digits = digits_bin(v); + if(C4_LIKELY(buf.len >= pos + 2u + digits)) + { + buf.str[pos + 0] = '0'; + buf.str[pos + 1] = 'b'; + write_bin_unchecked(buf.sub(pos + 2), v, digits); + } + digits += 2u; + break; + case T(8): + digits = digits_oct(v); + if(C4_LIKELY(buf.len >= pos + 2u + digits)) + { + buf.str[pos + 0] = '0'; + buf.str[pos + 1] = 'o'; + write_oct_unchecked(buf.sub(pos + 2), v, digits); + } + digits += 2u; + break; + } + return pos + digits; + } + C4_SUPPRESS_WARNING_GCC_POP + // when T is the min value (eg i8: -128), negating it + // will overflow + return detail::_itoa2buf(buf, radix); +} + + +/** same as c4::itoa(), but pad with zeroes on the left such that the + * resulting string is @p num_digits wide, not accounting for radix + * prefix (0x,0o,0b). The @p radix must be 2, 8, 10 or 16. + * + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t itoa(substr buf, T v, T radix, size_t num_digits) noexcept +{ + C4_STATIC_ASSERT(std::is_signed::value); + C4_ASSERT(radix == 2 || radix == 8 || radix == 10 || radix == 16); + C4_SUPPRESS_WARNING_GCC_PUSH + #if (defined(__GNUC__) && (__GNUC__ >= 7)) + C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow") // gcc has a false positive here + #endif + // when T is the min value (eg i8: -128), negating it + // will overflow, so treat the min as a special case + if(C4_LIKELY(v != std::numeric_limits::min())) + { + unsigned pos = 0; + if(v < 0) + { + v = -v; + if(C4_LIKELY(buf.len > 0)) + buf.str[pos] = '-'; + ++pos; + } + unsigned total_digits = 0; + switch(radix) + { + case T(10): + total_digits = digits_dec(v); + total_digits = pos + (unsigned)(num_digits > total_digits ? num_digits : total_digits); + if(C4_LIKELY(buf.len >= total_digits)) + write_dec(buf.sub(pos), v, num_digits); + break; + case T(16): + total_digits = digits_hex(v); + total_digits = pos + 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits); + if(C4_LIKELY(buf.len >= total_digits)) + { + buf.str[pos + 0] = '0'; + buf.str[pos + 1] = 'x'; + write_hex(buf.sub(pos + 2), v, num_digits); + } + break; + case T(2): + total_digits = digits_bin(v); + total_digits = pos + 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits); + if(C4_LIKELY(buf.len >= total_digits)) + { + buf.str[pos + 0] = '0'; + buf.str[pos + 1] = 'b'; + write_bin(buf.sub(pos + 2), v, num_digits); + } + break; + case T(8): + total_digits = digits_oct(v); + total_digits = pos + 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits); + if(C4_LIKELY(buf.len >= total_digits)) + { + buf.str[pos + 0] = '0'; + buf.str[pos + 1] = 'o'; + write_oct(buf.sub(pos + 2), v, num_digits); + } + break; + } + return total_digits; + } + C4_SUPPRESS_WARNING_GCC_POP + // when T is the min value (eg i8: -128), negating it + // will overflow + return detail::_itoa2buf(buf, radix, num_digits); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** convert an integral unsigned decimal to a string. + * + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t utoa(substr buf, T v) noexcept +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + // write_dec() does the buffer length check, so no need to check here + return write_dec(buf, v); +} + +/** convert an integral unsigned integer to a string, using a specific + * radix. The radix must be 2, 8, 10 or 16. + * + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix) noexcept +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(radix == 10 || radix == 16 || radix == 2 || radix == 8); + unsigned digits = 0; + switch(radix) + { + case T(10): + digits = digits_dec(v); + if(C4_LIKELY(buf.len >= digits)) + write_dec_unchecked(buf, v, digits); + break; + case T(16): + digits = digits_hex(v); + if(C4_LIKELY(buf.len >= digits+2u)) + { + buf.str[0] = '0'; + buf.str[1] = 'x'; + write_hex_unchecked(buf.sub(2), v, digits); + } + digits += 2u; + break; + case T(2): + digits = digits_bin(v); + if(C4_LIKELY(buf.len >= digits+2u)) + { + buf.str[0] = '0'; + buf.str[1] = 'b'; + write_bin_unchecked(buf.sub(2), v, digits); + } + digits += 2u; + break; + case T(8): + digits = digits_oct(v); + if(C4_LIKELY(buf.len >= digits+2u)) + { + buf.str[0] = '0'; + buf.str[1] = 'o'; + write_oct_unchecked(buf.sub(2), v, digits); + } + digits += 2u; + break; + } + return digits; +} + +/** same as c4::utoa(), but pad with zeroes on the left such that the + * resulting string is @p num_digits wide. The @p radix must be 2, + * 8, 10 or 16. + * + * @note the resulting string is NOT zero-terminated. + * @note it is ok to call this with an empty or too-small buffer; + * no writes will occur, and the needed size will be returned + * @return the number of characters required for the buffer. */ +template +C4_ALWAYS_INLINE size_t utoa(substr buf, T v, T radix, size_t num_digits) noexcept +{ + C4_STATIC_ASSERT(std::is_unsigned::value); + C4_ASSERT(radix == 10 || radix == 16 || radix == 2 || radix == 8); + unsigned total_digits = 0; + switch(radix) + { + case T(10): + total_digits = digits_dec(v); + total_digits = (unsigned)(num_digits > total_digits ? num_digits : total_digits); + if(C4_LIKELY(buf.len >= total_digits)) + write_dec(buf, v, num_digits); + break; + case T(16): + total_digits = digits_hex(v); + total_digits = 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits); + if(C4_LIKELY(buf.len >= total_digits)) + { + buf.str[0] = '0'; + buf.str[1] = 'x'; + write_hex(buf.sub(2), v, num_digits); + } + break; + case T(2): + total_digits = digits_bin(v); + total_digits = 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits); + if(C4_LIKELY(buf.len >= total_digits)) + { + buf.str[0] = '0'; + buf.str[1] = 'b'; + write_bin(buf.sub(2), v, num_digits); + } + break; + case T(8): + total_digits = digits_oct(v); + total_digits = 2u + (unsigned)(num_digits > total_digits ? num_digits : total_digits); + if(C4_LIKELY(buf.len >= total_digits)) + { + buf.str[0] = '0'; + buf.str[1] = 'o'; + write_oct(buf.sub(2), v, num_digits); + } + break; + } + return total_digits; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** Convert a trimmed string to a signed integral value. The input + * string can be formatted as decimal, binary (prefix 0b or 0B), octal + * (prefix 0o or 0O) or hexadecimal (prefix 0x or 0X). Strings with + * leading zeroes are considered as decimal and not octal (unlike the + * C/C++ convention). Every character in the input string is read for + * the conversion; the input string must not contain any leading or + * trailing whitespace. + * + * @return true if the conversion was successful. + * + * @note overflow is not detected: the return status is true even if + * the conversion would return a value outside of the type's range, in + * which case the result will wrap around the type's range. + * This is similar to native behavior. + * + * @note a positive sign is not accepted. ie, the string must not + * start with '+' + * + * @see atoi_first() if the string is not trimmed to the value to read. */ +template +C4_ALWAYS_INLINE bool atoi(csubstr str, T * C4_RESTRICT v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + C4_STATIC_ASSERT(std::is_signed::value); + + if(C4_UNLIKELY(str.len == 0)) + return false; + + C4_ASSERT(str.str[0] != '+'); + + T sign = 1; + size_t start = 0; + if(str.str[0] == '-') + { + if(C4_UNLIKELY(str.len == ++start)) + return false; + sign = -1; + } + + bool parsed_ok = true; + if(str.str[start] != '0') // this should be the common case, so put it first + { + parsed_ok = read_dec(str.sub(start), v); + } + else if(str.len > start + 1) + { + // starts with 0: is it 0x, 0o, 0b? + const char pfx = str.str[start + 1]; + if(pfx == 'x' || pfx == 'X') + parsed_ok = str.len > start + 2 && read_hex(str.sub(start + 2), v); + else if(pfx == 'b' || pfx == 'B') + parsed_ok = str.len > start + 2 && read_bin(str.sub(start + 2), v); + else if(pfx == 'o' || pfx == 'O') + parsed_ok = str.len > start + 2 && read_oct(str.sub(start + 2), v); + else + parsed_ok = read_dec(str.sub(start + 1), v); + } + else + { + parsed_ok = read_dec(str.sub(start), v); + } + if(C4_LIKELY(parsed_ok)) + *v *= sign; + return parsed_ok; +} + + +/** Select the next range of characters in the string that can be parsed + * as a signed integral value, and convert it using atoi(). Leading + * whitespace (space, newline, tabs) is skipped. + * @return the number of characters read for conversion, or csubstr::npos if the conversion failed + * @see atoi() if the string is already trimmed to the value to read. + * @see csubstr::first_int_span() */ +template +C4_ALWAYS_INLINE size_t atoi_first(csubstr str, T * C4_RESTRICT v) +{ + csubstr trimmed = str.first_int_span(); + if(trimmed.len == 0) + return csubstr::npos; + if(atoi(trimmed, v)) + return static_cast(trimmed.end() - str.begin()); + return csubstr::npos; +} + + +//----------------------------------------------------------------------------- + +/** Convert a trimmed string to an unsigned integral value. The string can be + * formatted as decimal, binary (prefix 0b or 0B), octal (prefix 0o or 0O) + * or hexadecimal (prefix 0x or 0X). Every character in the input string is read + * for the conversion; it must not contain any leading or trailing whitespace. + * + * @return true if the conversion was successful. + * + * @note overflow is not detected: the return status is true even if + * the conversion would return a value outside of the type's range, in + * which case the result will wrap around the type's range. + * + * @note If the string has a minus character, the return status + * will be false. + * + * @see atou_first() if the string is not trimmed to the value to read. */ +template +bool atou(csubstr str, T * C4_RESTRICT v) noexcept +{ + C4_STATIC_ASSERT(std::is_integral::value); + + if(C4_UNLIKELY(str.len == 0 || str.front() == '-')) + return false; + + bool parsed_ok = true; + if(str.str[0] != '0') + { + parsed_ok = read_dec(str, v); + } + else + { + if(str.len > 1) + { + const char pfx = str.str[1]; + if(pfx == 'x' || pfx == 'X') + parsed_ok = str.len > 2 && read_hex(str.sub(2), v); + else if(pfx == 'b' || pfx == 'B') + parsed_ok = str.len > 2 && read_bin(str.sub(2), v); + else if(pfx == 'o' || pfx == 'O') + parsed_ok = str.len > 2 && read_oct(str.sub(2), v); + else + parsed_ok = read_dec(str, v); + } + else + { + *v = 0; // we know the first character is 0 + } + } + return parsed_ok; +} + + +/** Select the next range of characters in the string that can be parsed + * as an unsigned integral value, and convert it using atou(). Leading + * whitespace (space, newline, tabs) is skipped. + * @return the number of characters read for conversion, or csubstr::npos if the conversion faileds + * @see atou() if the string is already trimmed to the value to read. + * @see csubstr::first_uint_span() */ +template +C4_ALWAYS_INLINE size_t atou_first(csubstr str, T *v) +{ + csubstr trimmed = str.first_uint_span(); + if(trimmed.len == 0) + return csubstr::npos; + if(atou(trimmed, v)) + return static_cast(trimmed.end() - str.begin()); + return csubstr::npos; +} + + +#ifdef _MSC_VER +# pragma warning(pop) +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +namespace detail { +inline bool check_overflow(csubstr str, csubstr limit) noexcept +{ + if(str.len == limit.len) + { + for(size_t i = 0; i < limit.len; ++i) + { + if(str[i] < limit[i]) + return false; + else if(str[i] > limit[i]) + return true; + } + return false; + } + else + return str.len > limit.len; +} +} // namespace detail + + +/** Test if the following string would overflow when converted to associated + * types. + * @return true if number will overflow, false if it fits (or doesn't parse) + */ +template +auto overflows(csubstr str) noexcept + -> typename std::enable_if::value, bool>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + + if(C4_UNLIKELY(str.len == 0)) + { + return false; + } + else if(str.str[0] == '0') + { + if (str.len == 1) + return false; + switch (str.str[1]) + { + case 'x': + case 'X': + { + size_t fno = str.first_not_of('0', 2); + if (fno == csubstr::npos) + return false; + return !(str.len <= fno + (sizeof(T) * 2)); + } + case 'b': + case 'B': + { + size_t fno = str.first_not_of('0', 2); + if (fno == csubstr::npos) + return false; + return !(str.len <= fno +(sizeof(T) * 8)); + } + case 'o': + case 'O': + { + size_t fno = str.first_not_of('0', 2); + if(fno == csubstr::npos) + return false; + return detail::charconv_digits::is_oct_overflow(str.sub(fno)); + } + default: + { + size_t fno = str.first_not_of('0', 1); + if(fno == csubstr::npos) + return false; + return detail::check_overflow(str.sub(fno), detail::charconv_digits::max_value_dec()); + } + } + } + else if(C4_UNLIKELY(str[0] == '-')) + { + return true; + } + else + { + return detail::check_overflow(str, detail::charconv_digits::max_value_dec()); + } +} + + +/** Test if the following string would overflow when converted to associated + * types. + * @return true if number will overflow, false if it fits (or doesn't parse) + */ +template +auto overflows(csubstr str) + -> typename std::enable_if::value, bool>::type +{ + C4_STATIC_ASSERT(std::is_integral::value); + if(C4_UNLIKELY(str.len == 0)) + return false; + if(str.str[0] == '-') + { + if(str.str[1] == '0') + { + if(str.len == 2) + return false; + switch(str.str[2]) + { + case 'x': + case 'X': + { + size_t fno = str.first_not_of('0', 3); + if (fno == csubstr::npos) + return false; + return detail::check_overflow(str.sub(fno), detail::charconv_digits::min_value_hex()); + } + case 'b': + case 'B': + { + size_t fno = str.first_not_of('0', 3); + if (fno == csubstr::npos) + return false; + return detail::check_overflow(str.sub(fno), detail::charconv_digits::min_value_bin()); + } + case 'o': + case 'O': + { + size_t fno = str.first_not_of('0', 3); + if(fno == csubstr::npos) + return false; + return detail::check_overflow(str.sub(fno), detail::charconv_digits::min_value_oct()); + } + default: + { + size_t fno = str.first_not_of('0', 2); + if(fno == csubstr::npos) + return false; + return detail::check_overflow(str.sub(fno), detail::charconv_digits::min_value_dec()); + } + } + } + else + return detail::check_overflow(str.sub(1), detail::charconv_digits::min_value_dec()); + } + else if(str.str[0] == '0') + { + if (str.len == 1) + return false; + switch(str.str[1]) + { + case 'x': + case 'X': + { + size_t fno = str.first_not_of('0', 2); + if (fno == csubstr::npos) + return false; + const size_t len = str.len - fno; + return !((len < sizeof (T) * 2) || (len == sizeof(T) * 2 && str[fno] <= '7')); + } + case 'b': + case 'B': + { + size_t fno = str.first_not_of('0', 2); + if (fno == csubstr::npos) + return false; + return !(str.len <= fno + (sizeof(T) * 8 - 1)); + } + case 'o': + case 'O': + { + size_t fno = str.first_not_of('0', 2); + if(fno == csubstr::npos) + return false; + return detail::charconv_digits::is_oct_overflow(str.sub(fno)); + } + default: + { + size_t fno = str.first_not_of('0', 1); + if(fno == csubstr::npos) + return false; + return detail::check_overflow(str.sub(fno), detail::charconv_digits::max_value_dec()); + } + } + } + else + return detail::check_overflow(str, detail::charconv_digits::max_value_dec()); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace detail { + + +#if (!C4CORE_HAVE_STD_FROMCHARS) +/** @see http://www.exploringbinary.com/ for many good examples on float-str conversion */ +template +void get_real_format_str(char (& C4_RESTRICT fmt)[N], int precision, RealFormat_e formatting, const char* length_modifier="") +{ + int iret; + if(precision == -1) + iret = snprintf(fmt, sizeof(fmt), "%%%s%c", length_modifier, formatting); + else if(precision == 0) + iret = snprintf(fmt, sizeof(fmt), "%%.%s%c", length_modifier, formatting); + else + iret = snprintf(fmt, sizeof(fmt), "%%.%d%s%c", precision, length_modifier, formatting); + C4_ASSERT(iret >= 2 && size_t(iret) < sizeof(fmt)); + C4_UNUSED(iret); +} + + +/** @todo we're depending on snprintf()/sscanf() for converting to/from + * floating point numbers. Apparently, this increases the binary size + * by a considerable amount. There are some lightweight printf + * implementations: + * + * @see http://www.sparetimelabs.com/tinyprintf/tinyprintf.php (BSD) + * @see https://github.com/weiss/c99-snprintf + * @see https://github.com/nothings/stb/blob/master/stb_sprintf.h + * @see http://www.exploringbinary.com/ + * @see https://blog.benoitblanchon.fr/lightweight-float-to-string/ + * @see http://www.ryanjuckett.com/programming/printing-floating-point-numbers/ + */ +template +size_t print_one(substr str, const char* full_fmt, T v) +{ +#ifdef _MSC_VER + /** use _snprintf() to prevent early termination of the output + * for writing the null character at the last position + * @see https://msdn.microsoft.com/en-us/library/2ts7cx93.aspx */ + int iret = _snprintf(str.str, str.len, full_fmt, v); + if(iret < 0) + { + /* when buf.len is not enough, VS returns a negative value. + * so call it again with a negative value for getting an + * actual length of the string */ + iret = snprintf(nullptr, 0, full_fmt, v); + C4_ASSERT(iret > 0); + } + size_t ret = (size_t) iret; + return ret; +#else + int iret = snprintf(str.str, str.len, full_fmt, v); + C4_ASSERT(iret >= 0); + size_t ret = (size_t) iret; + if(ret >= str.len) + ++ret; /* snprintf() reserves the last character to write \0 */ + return ret; +#endif +} +#endif // (!C4CORE_HAVE_STD_FROMCHARS) + + +#if (!C4CORE_HAVE_STD_FROMCHARS) && (!C4CORE_HAVE_FAST_FLOAT) +/** scans a string using the given type format, while at the same time + * allowing non-null-terminated strings AND guaranteeing that the given + * string length is strictly respected, so that no buffer overflows + * might occur. */ +template +inline size_t scan_one(csubstr str, const char *type_fmt, T *v) +{ + /* snscanf() is absolutely needed here as we must be sure that + * str.len is strictly respected, because substr is + * generally not null-terminated. + * + * Alas, there is no snscanf(). + * + * So we fake it by using a dynamic format with an explicit + * field size set to the length of the given span. + * This trick is taken from: + * https://stackoverflow.com/a/18368910/5875572 */ + + /* this is the actual format we'll use for scanning */ + char fmt[16]; + + /* write the length into it. Eg "%12f". + * Also, get the number of characters read from the string. + * So the final format ends up as "%12f%n"*/ + int iret = std::snprintf(fmt, sizeof(fmt), "%%" "%zu" "%s" "%%n", str.len, type_fmt); + /* no nasty surprises, please! */ + C4_ASSERT(iret >= 0 && size_t(iret) < C4_COUNTOF(fmt)); + + /* now we scan with confidence that the span length is respected */ + int num_chars; + iret = std::sscanf(str.str, fmt, v, &num_chars); + /* scanf returns the number of successful conversions */ + if(iret != 1) return csubstr::npos; + C4_ASSERT(num_chars >= 0); + return (size_t)(num_chars); +} +#endif // (!C4CORE_HAVE_STD_FROMCHARS) && (!C4CORE_HAVE_FAST_FLOAT) + + +#if C4CORE_HAVE_STD_TOCHARS +template +C4_ALWAYS_INLINE size_t rtoa(substr buf, T v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept +{ + std::to_chars_result result; + size_t pos = 0; + if(formatting == FTOA_HEXA) + { + if(buf.len > size_t(2)) + { + buf.str[0] = '0'; + buf.str[1] = 'x'; + } + pos += size_t(2); + } + if(precision == -1) + result = std::to_chars(buf.str + pos, buf.str + buf.len, v, (std::chars_format)formatting); + else + result = std::to_chars(buf.str + pos, buf.str + buf.len, v, (std::chars_format)formatting, precision); + if(result.ec == std::errc()) + { + // all good, no errors. + C4_ASSERT(result.ptr >= buf.str); + ptrdiff_t delta = result.ptr - buf.str; + return static_cast(delta); + } + C4_ASSERT(result.ec == std::errc::value_too_large); + // This is unfortunate. + // + // When the result can't fit in the given buffer, + // std::to_chars() returns the end pointer it was originally + // given, which is useless because here we would like to know + // _exactly_ how many characters the buffer must have to fit + // the result. + // + // So we take the pessimistic view, and assume as many digits + // as could ever be required: + size_t ret = static_cast(std::numeric_limits::max_digits10); + return ret > buf.len ? ret : buf.len + 1; +} +#endif // C4CORE_HAVE_STD_TOCHARS + + +#if C4CORE_HAVE_FAST_FLOAT +template +C4_ALWAYS_INLINE bool scan_rhex(csubstr s, T *C4_RESTRICT val) noexcept +{ + C4_ASSERT(s.len > 0); + C4_ASSERT(s.str[0] != '-'); + C4_ASSERT(s.str[0] != '+'); + C4_ASSERT(!s.begins_with("0x")); + C4_ASSERT(!s.begins_with("0X")); + size_t pos = 0; + // integer part + for( ; pos < s.len; ++pos) + { + const char c = s.str[pos]; + if(c >= '0' && c <= '9') + *val = *val * T(16) + T(c - '0'); + else if(c >= 'a' && c <= 'f') + *val = *val * T(16) + T(c - 'a'); + else if(c >= 'A' && c <= 'F') + *val = *val * T(16) + T(c - 'A'); + else if(c == '.') + { + ++pos; + break; // follow on to mantissa + } + else if(c == 'p' || c == 'P') + { + ++pos; + goto power; // no mantissa given, jump to power + } + else + { + return false; + } + } + // mantissa + { + // 0.0625 == 1/16 == value of first digit after the comma + for(T digit = T(0.0625); pos < s.len; ++pos, digit /= T(16)) + { + const char c = s.str[pos]; + if(c >= '0' && c <= '9') + *val += digit * T(c - '0'); + else if(c >= 'a' && c <= 'f') + *val += digit * T(c - 'a'); + else if(c >= 'A' && c <= 'F') + *val += digit * T(c - 'A'); + else if(c == 'p' || c == 'P') + { + ++pos; + goto power; // mantissa finished, jump to power + } + else + { + return false; + } + } + } + return true; +power: + if(C4_LIKELY(pos < s.len)) + { + if(s.str[pos] == '+') // atoi() cannot handle a leading '+' + ++pos; + if(C4_LIKELY(pos < s.len)) + { + int16_t powval = {}; + if(C4_LIKELY(atoi(s.sub(pos), &powval))) + { + *val *= ipow(powval); + return true; + } + } + } + return false; +} +#endif + +} // namespace detail + + +#undef _c4appendhex +#undef _c4append + + +/** Convert a single-precision real number to string. The string will + * in general be NOT null-terminated. For FTOA_FLEX, \p precision is + * the number of significand digits. Otherwise \p precision is the + * number of decimals. It is safe to call this function with an empty + * or too-small buffer. + * + * @return the size of the buffer needed to write the number + */ +C4_ALWAYS_INLINE size_t ftoa(substr str, float v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept +{ +#if C4CORE_HAVE_STD_TOCHARS + return detail::rtoa(str, v, precision, formatting); +#else + char fmt[16]; + detail::get_real_format_str(fmt, precision, formatting, /*length_modifier*/""); + return detail::print_one(str, fmt, v); +#endif +} + + +/** Convert a double-precision real number to string. The string will + * in general be NOT null-terminated. For FTOA_FLEX, \p precision is + * the number of significand digits. Otherwise \p precision is the + * number of decimals. It is safe to call this function with an empty + * or too-small buffer. + * + * @return the size of the buffer needed to write the number + */ +C4_ALWAYS_INLINE size_t dtoa(substr str, double v, int precision=-1, RealFormat_e formatting=FTOA_FLEX) noexcept +{ +#if C4CORE_HAVE_STD_TOCHARS + return detail::rtoa(str, v, precision, formatting); +#else + char fmt[16]; + detail::get_real_format_str(fmt, precision, formatting, /*length_modifier*/"l"); + return detail::print_one(str, fmt, v); +#endif +} + + +/** Convert a string to a single precision real number. + * The input string must be trimmed to the value, ie + * no leading or trailing whitespace can be present. + * @return true iff the conversion succeeded + * @see atof_first() if the string is not trimmed + */ +C4_ALWAYS_INLINE bool atof(csubstr str, float * C4_RESTRICT v) noexcept +{ + C4_ASSERT(str.len > 0); + C4_ASSERT(str.triml(" \r\t\n").len == str.len); +#if C4CORE_HAVE_FAST_FLOAT + // fastfloat cannot parse hexadecimal floats + bool isneg = (str.str[0] == '-'); + csubstr rem = str.sub(isneg || str.str[0] == '+'); + if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X')))) + { + fast_float::from_chars_result result; + result = fast_float::from_chars(str.str, str.str + str.len, *v); + return result.ec == std::errc(); + } + else if(detail::scan_rhex(rem.sub(2), v)) + { + *v *= isneg ? -1.f : 1.f; + return true; + } + return false; +#elif C4CORE_HAVE_STD_FROMCHARS + std::from_chars_result result; + result = std::from_chars(str.str, str.str + str.len, *v); + return result.ec == std::errc(); +#else + csubstr rem = str.sub(str.str[0] == '-' || str.str[0] == '+'); + if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X')))) + return detail::scan_one(str, "f", v) != csubstr::npos; + else + return detail::scan_one(str, "a", v) != csubstr::npos; +#endif +} + + +/** Convert a string to a double precision real number. + * The input string must be trimmed to the value, ie + * no leading or trailing whitespace can be present. + * @return true iff the conversion succeeded + * @see atod_first() if the string is not trimmed + */ +C4_ALWAYS_INLINE bool atod(csubstr str, double * C4_RESTRICT v) noexcept +{ + C4_ASSERT(str.triml(" \r\t\n").len == str.len); +#if C4CORE_HAVE_FAST_FLOAT + // fastfloat cannot parse hexadecimal floats + bool isneg = (str.str[0] == '-'); + csubstr rem = str.sub(isneg || str.str[0] == '+'); + if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X')))) + { + fast_float::from_chars_result result; + result = fast_float::from_chars(str.str, str.str + str.len, *v); + return result.ec == std::errc(); + } + else if(detail::scan_rhex(rem.sub(2), v)) + { + *v *= isneg ? -1. : 1.; + return true; + } + return false; +#elif C4CORE_HAVE_STD_FROMCHARS + std::from_chars_result result; + result = std::from_chars(str.str, str.str + str.len, *v); + return result.ec == std::errc(); +#else + csubstr rem = str.sub(str.str[0] == '-' || str.str[0] == '+'); + if(!(rem.len >= 2 && (rem.str[0] == '0' && (rem.str[1] == 'x' || rem.str[1] == 'X')))) + return detail::scan_one(str, "lf", v) != csubstr::npos; + else + return detail::scan_one(str, "la", v) != csubstr::npos; +#endif +} + + +/** Convert a string to a single precision real number. + * Leading whitespace is skipped until valid characters are found. + * @return the number of characters read from the string, or npos if + * conversion was not successful or if the string was empty */ +inline size_t atof_first(csubstr str, float * C4_RESTRICT v) noexcept +{ + csubstr trimmed = str.first_real_span(); + if(trimmed.len == 0) + return csubstr::npos; + if(atof(trimmed, v)) + return static_cast(trimmed.end() - str.begin()); + return csubstr::npos; +} + + +/** Convert a string to a double precision real number. + * Leading whitespace is skipped until valid characters are found. + * @return the number of characters read from the string, or npos if + * conversion was not successful or if the string was empty */ +inline size_t atod_first(csubstr str, double * C4_RESTRICT v) noexcept +{ + csubstr trimmed = str.first_real_span(); + if(trimmed.len == 0) + return csubstr::npos; + if(atod(trimmed, v)) + return static_cast(trimmed.end() - str.begin()); + return csubstr::npos; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// generic versions + +C4_ALWAYS_INLINE size_t xtoa(substr s, uint8_t v) noexcept { return write_dec(s, v); } +C4_ALWAYS_INLINE size_t xtoa(substr s, uint16_t v) noexcept { return write_dec(s, v); } +C4_ALWAYS_INLINE size_t xtoa(substr s, uint32_t v) noexcept { return write_dec(s, v); } +C4_ALWAYS_INLINE size_t xtoa(substr s, uint64_t v) noexcept { return write_dec(s, v); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int8_t v) noexcept { return itoa(s, v); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int16_t v) noexcept { return itoa(s, v); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int32_t v) noexcept { return itoa(s, v); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int64_t v) noexcept { return itoa(s, v); } +C4_ALWAYS_INLINE size_t xtoa(substr s, float v) noexcept { return ftoa(s, v); } +C4_ALWAYS_INLINE size_t xtoa(substr s, double v) noexcept { return dtoa(s, v); } + +C4_ALWAYS_INLINE size_t xtoa(substr s, uint8_t v, uint8_t radix) noexcept { return utoa(s, v, radix); } +C4_ALWAYS_INLINE size_t xtoa(substr s, uint16_t v, uint16_t radix) noexcept { return utoa(s, v, radix); } +C4_ALWAYS_INLINE size_t xtoa(substr s, uint32_t v, uint32_t radix) noexcept { return utoa(s, v, radix); } +C4_ALWAYS_INLINE size_t xtoa(substr s, uint64_t v, uint64_t radix) noexcept { return utoa(s, v, radix); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int8_t v, int8_t radix) noexcept { return itoa(s, v, radix); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int16_t v, int16_t radix) noexcept { return itoa(s, v, radix); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int32_t v, int32_t radix) noexcept { return itoa(s, v, radix); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int64_t v, int64_t radix) noexcept { return itoa(s, v, radix); } + +C4_ALWAYS_INLINE size_t xtoa(substr s, uint8_t v, uint8_t radix, size_t num_digits) noexcept { return utoa(s, v, radix, num_digits); } +C4_ALWAYS_INLINE size_t xtoa(substr s, uint16_t v, uint16_t radix, size_t num_digits) noexcept { return utoa(s, v, radix, num_digits); } +C4_ALWAYS_INLINE size_t xtoa(substr s, uint32_t v, uint32_t radix, size_t num_digits) noexcept { return utoa(s, v, radix, num_digits); } +C4_ALWAYS_INLINE size_t xtoa(substr s, uint64_t v, uint64_t radix, size_t num_digits) noexcept { return utoa(s, v, radix, num_digits); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int8_t v, int8_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int16_t v, int16_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int32_t v, int32_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); } +C4_ALWAYS_INLINE size_t xtoa(substr s, int64_t v, int64_t radix, size_t num_digits) noexcept { return itoa(s, v, radix, num_digits); } + +C4_ALWAYS_INLINE size_t xtoa(substr s, float v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return ftoa(s, v, precision, formatting); } +C4_ALWAYS_INLINE size_t xtoa(substr s, double v, int precision, RealFormat_e formatting=FTOA_FLEX) noexcept { return dtoa(s, v, precision, formatting); } + +C4_ALWAYS_INLINE bool atox(csubstr s, uint8_t *C4_RESTRICT v) noexcept { return atou(s, v); } +C4_ALWAYS_INLINE bool atox(csubstr s, uint16_t *C4_RESTRICT v) noexcept { return atou(s, v); } +C4_ALWAYS_INLINE bool atox(csubstr s, uint32_t *C4_RESTRICT v) noexcept { return atou(s, v); } +C4_ALWAYS_INLINE bool atox(csubstr s, uint64_t *C4_RESTRICT v) noexcept { return atou(s, v); } +C4_ALWAYS_INLINE bool atox(csubstr s, int8_t *C4_RESTRICT v) noexcept { return atoi(s, v); } +C4_ALWAYS_INLINE bool atox(csubstr s, int16_t *C4_RESTRICT v) noexcept { return atoi(s, v); } +C4_ALWAYS_INLINE bool atox(csubstr s, int32_t *C4_RESTRICT v) noexcept { return atoi(s, v); } +C4_ALWAYS_INLINE bool atox(csubstr s, int64_t *C4_RESTRICT v) noexcept { return atoi(s, v); } +C4_ALWAYS_INLINE bool atox(csubstr s, float *C4_RESTRICT v) noexcept { return atof(s, v); } +C4_ALWAYS_INLINE bool atox(csubstr s, double *C4_RESTRICT v) noexcept { return atod(s, v); } + +C4_ALWAYS_INLINE size_t to_chars(substr buf, uint8_t v) noexcept { return write_dec(buf, v); } +C4_ALWAYS_INLINE size_t to_chars(substr buf, uint16_t v) noexcept { return write_dec(buf, v); } +C4_ALWAYS_INLINE size_t to_chars(substr buf, uint32_t v) noexcept { return write_dec(buf, v); } +C4_ALWAYS_INLINE size_t to_chars(substr buf, uint64_t v) noexcept { return write_dec(buf, v); } +C4_ALWAYS_INLINE size_t to_chars(substr buf, int8_t v) noexcept { return itoa(buf, v); } +C4_ALWAYS_INLINE size_t to_chars(substr buf, int16_t v) noexcept { return itoa(buf, v); } +C4_ALWAYS_INLINE size_t to_chars(substr buf, int32_t v) noexcept { return itoa(buf, v); } +C4_ALWAYS_INLINE size_t to_chars(substr buf, int64_t v) noexcept { return itoa(buf, v); } +C4_ALWAYS_INLINE size_t to_chars(substr buf, float v) noexcept { return ftoa(buf, v); } +C4_ALWAYS_INLINE size_t to_chars(substr buf, double v) noexcept { return dtoa(buf, v); } + +C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint8_t *C4_RESTRICT v) noexcept { return atou(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint16_t *C4_RESTRICT v) noexcept { return atou(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint32_t *C4_RESTRICT v) noexcept { return atou(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, uint64_t *C4_RESTRICT v) noexcept { return atou(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, int8_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, int16_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, int32_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, int64_t *C4_RESTRICT v) noexcept { return atoi(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, float *C4_RESTRICT v) noexcept { return atof(buf, v); } +C4_ALWAYS_INLINE bool from_chars(csubstr buf, double *C4_RESTRICT v) noexcept { return atod(buf, v); } + +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint8_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); } +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint16_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); } +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint32_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); } +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, uint64_t *C4_RESTRICT v) noexcept { return atou_first(buf, v); } +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, int8_t *C4_RESTRICT v) noexcept { return atoi_first(buf, v); } +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, int16_t *C4_RESTRICT v) noexcept { return atoi_first(buf, v); } +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, int32_t *C4_RESTRICT v) noexcept { return atoi_first(buf, v); } +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, int64_t *C4_RESTRICT v) noexcept { return atoi_first(buf, v); } +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, float *C4_RESTRICT v) noexcept { return atof_first(buf, v); } +C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, double *C4_RESTRICT v) noexcept { return atod_first(buf, v); } + + +//----------------------------------------------------------------------------- +// on some platforms, (unsigned) int and (unsigned) long +// are not any of the fixed length types above + +#define _C4_IF_NOT_FIXED_LENGTH_I(T, ty) C4_ALWAYS_INLINE typename std::enable_if::value && !is_fixed_length::value_i, ty> +#define _C4_IF_NOT_FIXED_LENGTH_U(T, ty) C4_ALWAYS_INLINE typename std::enable_if::value && !is_fixed_length::value_u, ty> + +template _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type xtoa(substr buf, T v) noexcept { return itoa(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type xtoa(substr buf, T v) noexcept { return write_dec(buf, v); } + +template _C4_IF_NOT_FIXED_LENGTH_I(T, bool )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_U(T, bool )::type atox(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); } + +template _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type to_chars(substr buf, T v) noexcept { return itoa(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type to_chars(substr buf, T v) noexcept { return write_dec(buf, v); } + +template _C4_IF_NOT_FIXED_LENGTH_I(T, bool )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_U(T, bool )::type from_chars(csubstr buf, T *C4_RESTRICT v) noexcept { return atou(buf, v); } + +template _C4_IF_NOT_FIXED_LENGTH_I(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) noexcept { return atoi_first(buf, v); } +template _C4_IF_NOT_FIXED_LENGTH_U(T, size_t)::type from_chars_first(csubstr buf, T *C4_RESTRICT v) noexcept { return atou_first(buf, v); } + +#undef _C4_IF_NOT_FIXED_LENGTH_I +#undef _C4_IF_NOT_FIXED_LENGTH_U + + +//----------------------------------------------------------------------------- +// for pointers + +template C4_ALWAYS_INLINE size_t xtoa(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); } +template C4_ALWAYS_INLINE bool atox(csubstr s, T **v) noexcept { intptr_t tmp; bool ret = atox(s, &tmp); if(ret) { *v = (T*)tmp; } return ret; } +template C4_ALWAYS_INLINE size_t to_chars(substr s, T *v) noexcept { return itoa(s, (intptr_t)v, (intptr_t)16); } +template C4_ALWAYS_INLINE bool from_chars(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; } +template C4_ALWAYS_INLINE size_t from_chars_first(csubstr buf, T **v) noexcept { intptr_t tmp; bool ret = from_chars_first(buf, &tmp); if(ret) { *v = (T*)tmp; } return ret; } + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** call to_chars() and return a substr consisting of the + * written portion of the input buffer. Ie, same as to_chars(), + * but return a substr instead of a size_t. + * + * @see to_chars() */ +template +C4_ALWAYS_INLINE substr to_chars_sub(substr buf, T const& C4_RESTRICT v) noexcept +{ + size_t sz = to_chars(buf, v); + return buf.left_of(sz <= buf.len ? sz : buf.len); +} + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// bool implementation + +C4_ALWAYS_INLINE size_t to_chars(substr buf, bool v) noexcept +{ + int val = v; + return to_chars(buf, val); +} + +inline bool from_chars(csubstr buf, bool * C4_RESTRICT v) noexcept +{ + if(buf == '0') + { + *v = false; return true; + } + else if(buf == '1') + { + *v = true; return true; + } + else if(buf == "false") + { + *v = false; return true; + } + else if(buf == "true") + { + *v = true; return true; + } + else if(buf == "False") + { + *v = false; return true; + } + else if(buf == "True") + { + *v = true; return true; + } + else if(buf == "FALSE") + { + *v = false; return true; + } + else if(buf == "TRUE") + { + *v = true; return true; + } + // fallback to c-style int bools + int val = 0; + bool ret = from_chars(buf, &val); + if(C4_LIKELY(ret)) + { + *v = (val != 0); + } + return ret; +} + +inline size_t from_chars_first(csubstr buf, bool * C4_RESTRICT v) noexcept +{ + csubstr trimmed = buf.first_non_empty_span(); + if(trimmed.len == 0 || !from_chars(buf, v)) + return csubstr::npos; + return trimmed.len; +} + + +//----------------------------------------------------------------------------- +// single-char implementation + +inline size_t to_chars(substr buf, char v) noexcept +{ + if(buf.len > 0) + buf[0] = v; + return 1; +} + +/** extract a single character from a substring + * @note to extract a string instead and not just a single character, use the csubstr overload */ +inline bool from_chars(csubstr buf, char * C4_RESTRICT v) noexcept +{ + if(buf.len != 1) + return false; + *v = buf[0]; + return true; +} + +inline size_t from_chars_first(csubstr buf, char * C4_RESTRICT v) noexcept +{ + if(buf.len < 1) + return csubstr::npos; + *v = buf[0]; + return 1; +} + + +//----------------------------------------------------------------------------- +// csubstr implementation + +inline size_t to_chars(substr buf, csubstr v) noexcept +{ + C4_ASSERT(!buf.overlaps(v)); + size_t len = buf.len < v.len ? buf.len : v.len; + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len) + { + C4_ASSERT(buf.str != nullptr); + C4_ASSERT(v.str != nullptr); + memcpy(buf.str, v.str, len); + } + return v.len; +} + +inline bool from_chars(csubstr buf, csubstr *C4_RESTRICT v) noexcept +{ + *v = buf; + return true; +} + +inline size_t from_chars_first(substr buf, csubstr * C4_RESTRICT v) noexcept +{ + csubstr trimmed = buf.first_non_empty_span(); + if(trimmed.len == 0) + return csubstr::npos; + *v = trimmed; + return static_cast(trimmed.end() - buf.begin()); +} + + +//----------------------------------------------------------------------------- +// substr + +inline size_t to_chars(substr buf, substr v) noexcept +{ + C4_ASSERT(!buf.overlaps(v)); + size_t len = buf.len < v.len ? buf.len : v.len; + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len) + { + C4_ASSERT(buf.str != nullptr); + C4_ASSERT(v.str != nullptr); + memcpy(buf.str, v.str, len); + } + return v.len; +} + +inline bool from_chars(csubstr buf, substr * C4_RESTRICT v) noexcept +{ + C4_ASSERT(!buf.overlaps(*v)); + // is the destination buffer wide enough? + if(v->len >= buf.len) + { + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(buf.len) + { + C4_ASSERT(buf.str != nullptr); + C4_ASSERT(v->str != nullptr); + memcpy(v->str, buf.str, buf.len); + } + v->len = buf.len; + return true; + } + return false; +} + +inline size_t from_chars_first(csubstr buf, substr * C4_RESTRICT v) noexcept +{ + csubstr trimmed = buf.first_non_empty_span(); + C4_ASSERT(!trimmed.overlaps(*v)); + if(C4_UNLIKELY(trimmed.len == 0)) + return csubstr::npos; + size_t len = trimmed.len > v->len ? v->len : trimmed.len; + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len) + { + C4_ASSERT(buf.str != nullptr); + C4_ASSERT(v->str != nullptr); + memcpy(v->str, trimmed.str, len); + } + if(C4_UNLIKELY(trimmed.len > v->len)) + return csubstr::npos; + return static_cast(trimmed.end() - buf.begin()); +} + + +//----------------------------------------------------------------------------- + +template +inline size_t to_chars(substr buf, const char (& C4_RESTRICT v)[N]) noexcept +{ + csubstr sp(v); + return to_chars(buf, sp); +} + +inline size_t to_chars(substr buf, const char * C4_RESTRICT v) noexcept +{ + return to_chars(buf, to_csubstr(v)); +} + +} // namespace c4 + +#ifdef _MSC_VER +# pragma warning(pop) +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* _C4_CHARCONV_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/charconv.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/utf.hpp +// https://github.com/biojppm/c4core/src/c4/utf.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_UTF_HPP_ +#define C4_UTF_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/language.hpp +//#include "c4/language.hpp" +#if !defined(C4_LANGUAGE_HPP_) && !defined(_C4_LANGUAGE_HPP_) +#error "amalgamate: file c4/language.hpp must have been included at this point" +#endif /* C4_LANGUAGE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/substr_fwd.hpp +//#include "c4/substr_fwd.hpp" +#if !defined(C4_SUBSTR_FWD_HPP_) && !defined(_C4_SUBSTR_FWD_HPP_) +#error "amalgamate: file c4/substr_fwd.hpp must have been included at this point" +#endif /* C4_SUBSTR_FWD_HPP_ */ + +//included above: +//#include +//included above: +//#include + +namespace c4 { + +substr decode_code_point(substr out, csubstr code_point); +size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, const uint32_t code); + +} // namespace c4 + +#endif // C4_UTF_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/utf.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/format.hpp +// https://github.com/biojppm/c4core/src/c4/format.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_FORMAT_HPP_ +#define _C4_FORMAT_HPP_ + +/** @file format.hpp provides type-safe facilities for formatting arguments + * to string buffers */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/charconv.hpp +//#include "c4/charconv.hpp" +#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_) +#error "amalgamate: file c4/charconv.hpp must have been included at this point" +#endif /* C4_CHARCONV_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/blob.hpp +//#include "c4/blob.hpp" +#if !defined(C4_BLOB_HPP_) && !defined(_C4_BLOB_HPP_) +#error "amalgamate: file c4/blob.hpp must have been included at this point" +#endif /* C4_BLOB_HPP_ */ + + + +#ifdef _MSC_VER +# pragma warning(push) +# if C4_MSVC_VERSION != C4_MSVC_VERSION_2017 +# pragma warning(disable: 4800) // forcing value to bool 'true' or 'false' (performance warning) +# endif +# pragma warning(disable: 4996) // snprintf/scanf: this function or variable may be unsafe +#elif defined(__clang__) +# pragma clang diagnostic push +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wuseless-cast" +#endif + +namespace c4 { + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// formatting truthy types as booleans + +namespace fmt { + +/** write a variable as an alphabetic boolean, ie as either true or false + * @param strict_read */ +template +struct boolalpha_ +{ + boolalpha_(T val_, bool strict_read_=false) : val(val_ ? true : false), strict_read(strict_read_) {} + bool val; + bool strict_read; +}; + +template +boolalpha_ boolalpha(T const& val, bool strict_read=false) +{ + return boolalpha_(val, strict_read); +} + +} // namespace fmt + +/** write a variable as an alphabetic boolean, ie as either true or false */ +template +inline size_t to_chars(substr buf, fmt::boolalpha_ fmt) +{ + return to_chars(buf, fmt.val ? "true" : "false"); +} + + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// formatting integral types + +namespace fmt { + +/** format an integral type with a custom radix */ +template +struct integral_ +{ + T val; + T radix; + C4_ALWAYS_INLINE integral_(T val_, T radix_) : val(val_), radix(radix_) {} +}; + +/** format an integral type with a custom radix, and pad with zeroes on the left */ +template +struct integral_padded_ +{ + T val; + T radix; + size_t num_digits; + C4_ALWAYS_INLINE integral_padded_(T val_, T radix_, size_t nd) : val(val_), radix(radix_), num_digits(nd) {} +}; + +/** format an integral type with a custom radix */ +template +C4_ALWAYS_INLINE integral_ integral(T val, T radix=10) +{ + return integral_(val, radix); +} +/** format an integral type with a custom radix */ +template +C4_ALWAYS_INLINE integral_ integral(T const* val, T radix=10) +{ + return integral_(reinterpret_cast(val), static_cast(radix)); +} +/** format an integral type with a custom radix */ +template +C4_ALWAYS_INLINE integral_ integral(std::nullptr_t, T radix=10) +{ + return integral_(intptr_t(0), static_cast(radix)); +} +/** pad the argument with zeroes on the left, with decimal radix */ +template +C4_ALWAYS_INLINE integral_padded_ zpad(T val, size_t num_digits) +{ + return integral_padded_(val, T(10), num_digits); +} +/** pad the argument with zeroes on the left */ +template +C4_ALWAYS_INLINE integral_padded_ zpad(integral_ val, size_t num_digits) +{ + return integral_padded_(val.val, val.radix, num_digits); +} +/** pad the argument with zeroes on the left */ +C4_ALWAYS_INLINE integral_padded_ zpad(std::nullptr_t, size_t num_digits) +{ + return integral_padded_(0, 16, num_digits); +} +/** pad the argument with zeroes on the left */ +template +C4_ALWAYS_INLINE integral_padded_ zpad(T const* val, size_t num_digits) +{ + return integral_padded_(reinterpret_cast(val), 16, num_digits); +} +template +C4_ALWAYS_INLINE integral_padded_ zpad(T * val, size_t num_digits) +{ + return integral_padded_(reinterpret_cast(val), 16, num_digits); +} + + +/** format the pointer as an hexadecimal value */ +template +inline integral_ hex(T * v) +{ + return integral_(reinterpret_cast(v), intptr_t(16)); +} +/** format the pointer as an hexadecimal value */ +template +inline integral_ hex(T const* v) +{ + return integral_(reinterpret_cast(v), intptr_t(16)); +} +/** format null as an hexadecimal value + * @overload hex */ +inline integral_ hex(std::nullptr_t) +{ + return integral_(0, intptr_t(16)); +} +/** format the integral_ argument as an hexadecimal value + * @overload hex */ +template +inline integral_ hex(T v) +{ + return integral_(v, T(16)); +} + +/** format the pointer as an octal value */ +template +inline integral_ oct(T const* v) +{ + return integral_(reinterpret_cast(v), intptr_t(8)); +} +/** format the pointer as an octal value */ +template +inline integral_ oct(T * v) +{ + return integral_(reinterpret_cast(v), intptr_t(8)); +} +/** format null as an octal value */ +inline integral_ oct(std::nullptr_t) +{ + return integral_(intptr_t(0), intptr_t(8)); +} +/** format the integral_ argument as an octal value */ +template +inline integral_ oct(T v) +{ + return integral_(v, T(8)); +} + +/** format the pointer as a binary 0-1 value + * @see c4::raw() if you want to use a binary memcpy instead of 0-1 formatting */ +template +inline integral_ bin(T const* v) +{ + return integral_(reinterpret_cast(v), intptr_t(2)); +} +/** format the pointer as a binary 0-1 value + * @see c4::raw() if you want to use a binary memcpy instead of 0-1 formatting */ +template +inline integral_ bin(T * v) +{ + return integral_(reinterpret_cast(v), intptr_t(2)); +} +/** format null as a binary 0-1 value + * @see c4::raw() if you want to use a binary memcpy instead of 0-1 formatting */ +inline integral_ bin(std::nullptr_t) +{ + return integral_(intptr_t(0), intptr_t(2)); +} +/** format the integral_ argument as a binary 0-1 value + * @see c4::raw() if you want to use a raw memcpy-based binary dump instead of 0-1 formatting */ +template +inline integral_ bin(T v) +{ + return integral_(v, T(2)); +} + + +template +struct overflow_checked_ +{ + static_assert(std::is_integral::value, "range checking only for integral types"); + C4_ALWAYS_INLINE overflow_checked_(T &val_) : val(&val_) {} + T *val; +}; +template +C4_ALWAYS_INLINE overflow_checked_ overflow_checked(T &val) +{ + return overflow_checked_(val); +} + +} // namespace fmt + +/** format an integral_ signed type */ +template +C4_ALWAYS_INLINE +typename std::enable_if::value, size_t>::type +to_chars(substr buf, fmt::integral_ fmt) +{ + return itoa(buf, fmt.val, fmt.radix); +} +/** format an integral_ signed type, pad with zeroes */ +template +C4_ALWAYS_INLINE +typename std::enable_if::value, size_t>::type +to_chars(substr buf, fmt::integral_padded_ fmt) +{ + return itoa(buf, fmt.val, fmt.radix, fmt.num_digits); +} + +/** format an integral_ unsigned type */ +template +C4_ALWAYS_INLINE +typename std::enable_if::value, size_t>::type +to_chars(substr buf, fmt::integral_ fmt) +{ + return utoa(buf, fmt.val, fmt.radix); +} +/** format an integral_ unsigned type, pad with zeroes */ +template +C4_ALWAYS_INLINE +typename std::enable_if::value, size_t>::type +to_chars(substr buf, fmt::integral_padded_ fmt) +{ + return utoa(buf, fmt.val, fmt.radix, fmt.num_digits); +} + +template +C4_ALWAYS_INLINE bool from_chars(csubstr s, fmt::overflow_checked_ wrapper) +{ + if(C4_LIKELY(!overflows(s))) + return atox(s, wrapper.val); + return false; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// formatting real types + +namespace fmt { + +template +struct real_ +{ + T val; + int precision; + RealFormat_e fmt; + real_(T v, int prec=-1, RealFormat_e f=FTOA_FLOAT) : val(v), precision(prec), fmt(f) {} +}; + +template +real_ real(T val, int precision, RealFormat_e fmt=FTOA_FLOAT) +{ + return real_(val, precision, fmt); +} + +} // namespace fmt + +inline size_t to_chars(substr buf, fmt::real_< float> fmt) { return ftoa(buf, fmt.val, fmt.precision, fmt.fmt); } +inline size_t to_chars(substr buf, fmt::real_ fmt) { return dtoa(buf, fmt.val, fmt.precision, fmt.fmt); } + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// writing raw binary data + +namespace fmt { + +/** @see blob_ */ +template +struct raw_wrapper_ : public blob_ +{ + size_t alignment; + + C4_ALWAYS_INLINE raw_wrapper_(blob_ data, size_t alignment_) noexcept + : + blob_(data), + alignment(alignment_) + { + C4_ASSERT_MSG(alignment > 0 && (alignment & (alignment - 1)) == 0, "alignment must be a power of two"); + } +}; + +using const_raw_wrapper = raw_wrapper_; +using raw_wrapper = raw_wrapper_; + +/** mark a variable to be written in raw binary format, using memcpy + * @see blob_ */ +inline const_raw_wrapper craw(cblob data, size_t alignment=alignof(max_align_t)) +{ + return const_raw_wrapper(data, alignment); +} +/** mark a variable to be written in raw binary format, using memcpy + * @see blob_ */ +inline const_raw_wrapper raw(cblob data, size_t alignment=alignof(max_align_t)) +{ + return const_raw_wrapper(data, alignment); +} +/** mark a variable to be written in raw binary format, using memcpy + * @see blob_ */ +template +inline const_raw_wrapper craw(T const& C4_RESTRICT data, size_t alignment=alignof(T)) +{ + return const_raw_wrapper(cblob(data), alignment); +} +/** mark a variable to be written in raw binary format, using memcpy + * @see blob_ */ +template +inline const_raw_wrapper raw(T const& C4_RESTRICT data, size_t alignment=alignof(T)) +{ + return const_raw_wrapper(cblob(data), alignment); +} + +/** mark a variable to be read in raw binary format, using memcpy */ +inline raw_wrapper raw(blob data, size_t alignment=alignof(max_align_t)) +{ + return raw_wrapper(data, alignment); +} +/** mark a variable to be read in raw binary format, using memcpy */ +template +inline raw_wrapper raw(T & C4_RESTRICT data, size_t alignment=alignof(T)) +{ + return raw_wrapper(blob(data), alignment); +} + +} // namespace fmt + + +/** write a variable in raw binary format, using memcpy */ +C4CORE_EXPORT size_t to_chars(substr buf, fmt::const_raw_wrapper r); + +/** read a variable in raw binary format, using memcpy */ +C4CORE_EXPORT bool from_chars(csubstr buf, fmt::raw_wrapper *r); +/** read a variable in raw binary format, using memcpy */ +inline bool from_chars(csubstr buf, fmt::raw_wrapper r) +{ + return from_chars(buf, &r); +} + +/** read a variable in raw binary format, using memcpy */ +inline size_t from_chars_first(csubstr buf, fmt::raw_wrapper *r) +{ + return from_chars(buf, r); +} +/** read a variable in raw binary format, using memcpy */ +inline size_t from_chars_first(csubstr buf, fmt::raw_wrapper r) +{ + return from_chars(buf, &r); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +// formatting aligned to left/right + +namespace fmt { + +template +struct left_ +{ + T val; + size_t width; + char pad; + left_(T v, size_t w, char p) : val(v), width(w), pad(p) {} +}; + +template +struct right_ +{ + T val; + size_t width; + char pad; + right_(T v, size_t w, char p) : val(v), width(w), pad(p) {} +}; + +/** mark an argument to be aligned left */ +template +left_ left(T val, size_t width, char padchar=' ') +{ + return left_(val, width, padchar); +} + +/** mark an argument to be aligned right */ +template +right_ right(T val, size_t width, char padchar=' ') +{ + return right_(val, width, padchar); +} + +} // namespace fmt + + +template +size_t to_chars(substr buf, fmt::left_ const& C4_RESTRICT align) +{ + size_t ret = to_chars(buf, align.val); + if(ret >= buf.len || ret >= align.width) + return ret > align.width ? ret : align.width; + buf.first(align.width).sub(ret).fill(align.pad); + to_chars(buf, align.val); + return align.width; +} + +template +size_t to_chars(substr buf, fmt::right_ const& C4_RESTRICT align) +{ + size_t ret = to_chars(buf, align.val); + if(ret >= buf.len || ret >= align.width) + return ret > align.width ? ret : align.width; + size_t rem = static_cast(align.width - ret); + buf.first(rem).fill(align.pad); + to_chars(buf.sub(rem), align.val); + return align.width; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/// @cond dev +// terminates the variadic recursion +inline size_t cat(substr /*buf*/) +{ + return 0; +} +/// @endcond + + +/** serialize the arguments, concatenating them to the given fixed-size buffer. + * The buffer size is strictly respected: no writes will occur beyond its end. + * @return the number of characters needed to write all the arguments into the buffer. + * @see c4::catrs() if instead of a fixed-size buffer, a resizeable container is desired + * @see c4::uncat() for the inverse function + * @see c4::catsep() if a separator between each argument is to be used + * @see c4::format() if a format string is desired */ +template +size_t cat(substr buf, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + size_t num = to_chars(buf, a); + buf = buf.len >= num ? buf.sub(num) : substr{}; + num += cat(buf, more...); + return num; +} + +/** like c4::cat() but return a substr instead of a size */ +template +substr cat_sub(substr buf, Args && ...args) +{ + size_t sz = cat(buf, std::forward(args)...); + C4_CHECK(sz <= buf.len); + return {buf.str, sz <= buf.len ? sz : buf.len}; +} + + +//----------------------------------------------------------------------------- + +/// @cond dev +// terminates the variadic recursion +inline size_t uncat(csubstr /*buf*/) +{ + return 0; +} +/// @endcond + + +/** deserialize the arguments from the given buffer. + * + * @return the number of characters read from the buffer, or csubstr::npos + * if a conversion was not successful. + * @see c4::cat(). c4::uncat() is the inverse of c4::cat(). */ +template +size_t uncat(csubstr buf, Arg & C4_RESTRICT a, Args & C4_RESTRICT ...more) +{ + size_t out = from_chars_first(buf, &a); + if(C4_UNLIKELY(out == csubstr::npos)) + return csubstr::npos; + buf = buf.len >= out ? buf.sub(out) : substr{}; + size_t num = uncat(buf, more...); + if(C4_UNLIKELY(num == csubstr::npos)) + return csubstr::npos; + return out + num; +} + + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace detail { + +template +inline size_t catsep_more(substr /*buf*/, Sep const& C4_RESTRICT /*sep*/) +{ + return 0; +} + +template +size_t catsep_more(substr buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + size_t ret = to_chars(buf, sep), num = ret; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = to_chars(buf, a); + num += ret; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = catsep_more(buf, sep, more...); + num += ret; + return num; +} + +template +inline size_t uncatsep_more(csubstr /*buf*/, Sep & /*sep*/) +{ + return 0; +} + +template +size_t uncatsep_more(csubstr buf, Sep & C4_RESTRICT sep, Arg & C4_RESTRICT a, Args & C4_RESTRICT ...more) +{ + size_t ret = from_chars_first(buf, &sep), num = ret; + if(C4_UNLIKELY(ret == csubstr::npos)) + return csubstr::npos; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = from_chars_first(buf, &a); + if(C4_UNLIKELY(ret == csubstr::npos)) + return csubstr::npos; + num += ret; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = uncatsep_more(buf, sep, more...); + if(C4_UNLIKELY(ret == csubstr::npos)) + return csubstr::npos; + num += ret; + return num; +} + +} // namespace detail + + +/** serialize the arguments, concatenating them to the given fixed-size + * buffer, using a separator between each argument. + * The buffer size is strictly respected: no writes will occur beyond its end. + * @return the number of characters needed to write all the arguments into the buffer. + * @see c4::catseprs() if instead of a fixed-size buffer, a resizeable container is desired + * @see c4::uncatsep() for the inverse function (ie, reading instead of writing) + * @see c4::cat() if no separator is needed + * @see c4::format() if a format string is desired */ +template +size_t catsep(substr buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + size_t num = to_chars(buf, a); + buf = buf.len >= num ? buf.sub(num) : substr{}; + num += detail::catsep_more(buf, sep, more...); + return num; +} + +/** like c4::catsep() but return a substr instead of a size + * @see c4::catsep(). c4::uncatsep() is the inverse of c4::catsep(). */ +template +substr catsep_sub(substr buf, Args && ...args) +{ + size_t sz = catsep(buf, std::forward(args)...); + C4_CHECK(sz <= buf.len); + return {buf.str, sz <= buf.len ? sz : buf.len}; +} + +/** deserialize the arguments from the given buffer, using a separator. + * + * @return the number of characters read from the buffer, or csubstr::npos + * if a conversion was not successful + * @see c4::catsep(). c4::uncatsep() is the inverse of c4::catsep(). */ +template +size_t uncatsep(csubstr buf, Sep & C4_RESTRICT sep, Arg & C4_RESTRICT a, Args & C4_RESTRICT ...more) +{ + size_t ret = from_chars_first(buf, &a), num = ret; + if(C4_UNLIKELY(ret == csubstr::npos)) + return csubstr::npos; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = detail::uncatsep_more(buf, sep, more...); + if(C4_UNLIKELY(ret == csubstr::npos)) + return csubstr::npos; + num += ret; + return num; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/// @cond dev +// terminates the variadic recursion +inline size_t format(substr buf, csubstr fmt) +{ + return to_chars(buf, fmt); +} +/// @endcond + + +/** using a format string, serialize the arguments into the given + * fixed-size buffer. + * The buffer size is strictly respected: no writes will occur beyond its end. + * In the format string, each argument is marked with a compact + * curly-bracket pair: {}. Arguments beyond the last curly bracket pair + * are silently ignored. For example: + * @code{.cpp} + * c4::format(buf, "the {} drank {} {}", "partier", 5, "beers"); // the partier drank 5 beers + * c4::format(buf, "the {} drank {} {}", "programmer", 6, "coffees"); // the programmer drank 6 coffees + * @endcode + * @return the number of characters needed to write into the buffer. + * @see c4::formatrs() if instead of a fixed-size buffer, a resizeable container is desired + * @see c4::unformat() for the inverse function + * @see c4::cat() if no format or separator is needed + * @see c4::catsep() if no format is needed, but a separator must be used */ +template +size_t format(substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + size_t pos = fmt.find("{}"); // @todo use _find_fmt() + if(C4_UNLIKELY(pos == csubstr::npos)) + return to_chars(buf, fmt); + size_t num = to_chars(buf, fmt.sub(0, pos)); + size_t out = num; + buf = buf.len >= num ? buf.sub(num) : substr{}; + num = to_chars(buf, a); + out += num; + buf = buf.len >= num ? buf.sub(num) : substr{}; + num = format(buf, fmt.sub(pos + 2), more...); + out += num; + return out; +} + +/** like c4::format() but return a substr instead of a size + * @see c4::format() + * @see c4::catsep(). uncatsep() is the inverse of catsep(). */ +template +substr format_sub(substr buf, csubstr fmt, Args const& C4_RESTRICT ...args) +{ + size_t sz = c4::format(buf, fmt, args...); + C4_CHECK(sz <= buf.len); + return {buf.str, sz <= buf.len ? sz : buf.len}; +} + + +//----------------------------------------------------------------------------- + +/// @cond dev +// terminates the variadic recursion +inline size_t unformat(csubstr /*buf*/, csubstr fmt) +{ + return fmt.len; +} +/// @endcond + + +/** using a format string, deserialize the arguments from the given + * buffer. + * @return the number of characters read from the buffer, or npos if a conversion failed. + * @see c4::format(). c4::unformat() is the inverse function to format(). */ +template +size_t unformat(csubstr buf, csubstr fmt, Arg & C4_RESTRICT a, Args & C4_RESTRICT ...more) +{ + const size_t pos = fmt.find("{}"); + if(C4_UNLIKELY(pos == csubstr::npos)) + return unformat(buf, fmt); + size_t num = pos; + size_t out = num; + buf = buf.len >= num ? buf.sub(num) : substr{}; + num = from_chars_first(buf, &a); + if(C4_UNLIKELY(num == csubstr::npos)) + return csubstr::npos; + out += num; + buf = buf.len >= num ? buf.sub(num) : substr{}; + num = unformat(buf, fmt.sub(pos + 2), more...); + if(C4_UNLIKELY(num == csubstr::npos)) + return csubstr::npos; + out += num; + return out; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** a tag type for marking append to container + * @see c4::catrs() */ +struct append_t {}; + +/** a tag variable + * @see c4::catrs() */ +constexpr const append_t append = {}; + + +//----------------------------------------------------------------------------- + +/** like c4::cat(), but receives a container, and resizes it as needed to contain + * the result. The container is overwritten. To append to it, use the append + * overload. + * @see c4::cat() */ +template +inline void catrs(CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRICT ...args) +{ +retry: + substr buf = to_substr(*cont); + size_t ret = cat(buf, args...); + cont->resize(ret); + if(ret > buf.len) + goto retry; +} + +/** like c4::cat(), but creates and returns a new container sized as needed to contain + * the result. + * @see c4::cat() */ +template +inline CharOwningContainer catrs(Args const& C4_RESTRICT ...args) +{ + CharOwningContainer cont; + catrs(&cont, args...); + return cont; +} + +/** like c4::cat(), but receives a container, and appends to it instead of + * overwriting it. The container is resized as needed to contain the result. + * @return the region newly appended to the original container + * @see c4::cat() + * @see c4::catrs() */ +template +inline csubstr catrs(append_t, CharOwningContainer * C4_RESTRICT cont, Args const& C4_RESTRICT ...args) +{ + const size_t pos = cont->size(); +retry: + substr buf = to_substr(*cont).sub(pos); + size_t ret = cat(buf, args...); + cont->resize(pos + ret); + if(ret > buf.len) + goto retry; + return to_csubstr(*cont).range(pos, cont->size()); +} + + +//----------------------------------------------------------------------------- + +/// @cond dev +// terminates the recursion +template +inline void catseprs(CharOwningContainer * C4_RESTRICT, Sep const& C4_RESTRICT) +{ + return; +} +/// @end cond + + +/** like c4::catsep(), but receives a container, and resizes it as needed to contain the result. + * The container is overwritten. To append to the container use the append overload. + * @see c4::catsep() */ +template +inline void catseprs(CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args) +{ +retry: + substr buf = to_substr(*cont); + size_t ret = catsep(buf, sep, args...); + cont->resize(ret); + if(ret > buf.len) + goto retry; +} + +/** like c4::catsep(), but create a new container with the result. + * @return the requested container */ +template +inline CharOwningContainer catseprs(Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args) +{ + CharOwningContainer cont; + catseprs(&cont, sep, args...); + return cont; +} + + +/// @cond dev +// terminates the recursion +template +inline csubstr catseprs(append_t, CharOwningContainer * C4_RESTRICT, Sep const& C4_RESTRICT) +{ + csubstr s; + return s; +} +/// @endcond + +/** like catsep(), but receives a container, and appends the arguments, resizing the + * container as needed to contain the result. The buffer is appended to. + * @return a csubstr of the appended part + * @ingroup formatting_functions */ +template +inline csubstr catseprs(append_t, CharOwningContainer * C4_RESTRICT cont, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...args) +{ + const size_t pos = cont->size(); +retry: + substr buf = to_substr(*cont).sub(pos); + size_t ret = catsep(buf, sep, args...); + cont->resize(pos + ret); + if(ret > buf.len) + goto retry; + return to_csubstr(*cont).range(pos, cont->size()); +} + + +//----------------------------------------------------------------------------- + +/** like c4::format(), but receives a container, and resizes it as needed + * to contain the result. The container is overwritten. To append to + * the container use the append overload. + * @see c4::format() */ +template +inline void formatrs(CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args const& C4_RESTRICT ...args) +{ +retry: + substr buf = to_substr(*cont); + size_t ret = format(buf, fmt, args...); + cont->resize(ret); + if(ret > buf.len) + goto retry; +} + +/** like c4::format(), but create a new container with the result. + * @return the requested container */ +template +inline CharOwningContainer formatrs(csubstr fmt, Args const& C4_RESTRICT ...args) +{ + CharOwningContainer cont; + formatrs(&cont, fmt, args...); + return cont; +} + +/** like format(), but receives a container, and appends the + * arguments, resizing the container as needed to contain the + * result. The buffer is appended to. + * @return the region newly appended to the original container + * @ingroup formatting_functions */ +template +inline csubstr formatrs(append_t, CharOwningContainer * C4_RESTRICT cont, csubstr fmt, Args const& C4_RESTRICT ...args) +{ + const size_t pos = cont->size(); +retry: + substr buf = to_substr(*cont).sub(pos); + size_t ret = format(buf, fmt, args...); + cont->resize(pos + ret); + if(ret > buf.len) + goto retry; + return to_csubstr(*cont).range(pos, cont->size()); +} + +} // namespace c4 + +#ifdef _MSC_VER +# pragma warning(pop) +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* _C4_FORMAT_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/format.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/dump.hpp +// https://github.com/biojppm/c4core/src/c4/dump.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_DUMP_HPP_ +#define C4_DUMP_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/substr.hpp +//#include +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + + +namespace c4 { + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** type of the function to dump characters */ +using DumperPfn = void (*)(csubstr buf); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +inline size_t dump(substr buf, Arg const& a) +{ + size_t sz = to_chars(buf, a); // need to serialize to the buffer + if(C4_LIKELY(sz <= buf.len)) + dumpfn(buf.first(sz)); + return sz; +} + +template +inline size_t dump(DumperFn &&dumpfn, substr buf, Arg const& a) +{ + size_t sz = to_chars(buf, a); // need to serialize to the buffer + if(C4_LIKELY(sz <= buf.len)) + dumpfn(buf.first(sz)); + return sz; +} + +template +inline size_t dump(substr buf, csubstr a) +{ + if(buf.len) + dumpfn(a); // dump directly, no need to serialize to the buffer + return 0; // no space was used in the buffer +} + +template +inline size_t dump(DumperFn &&dumpfn, substr buf, csubstr a) +{ + if(buf.len) + dumpfn(a); // dump directly, no need to serialize to the buffer + return 0; // no space was used in the buffer +} + +template +inline size_t dump(substr buf, const char (&a)[N]) +{ + if(buf.len) + dumpfn(csubstr(a)); // dump directly, no need to serialize to the buffer + return 0; // no space was used in the buffer +} + +template +inline size_t dump(DumperFn &&dumpfn, substr buf, const char (&a)[N]) +{ + if(buf.len) + dumpfn(csubstr(a)); // dump directly, no need to serialize to the buffer + return 0; // no space was used in the buffer +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** */ +struct DumpResults +{ + enum : size_t { noarg = (size_t)-1 }; + size_t bufsize = 0; + size_t lastok = noarg; + bool success_until(size_t expected) const { return lastok == noarg ? false : lastok >= expected; } + bool write_arg(size_t arg) const { return lastok == noarg || arg > lastok; } + size_t argfail() const { return lastok + 1; } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/// @cond dev +// terminates the variadic recursion +template +size_t cat_dump(DumperFn &&, substr) +{ + return 0; +} + +// terminates the variadic recursion +template +size_t cat_dump(substr) +{ + return 0; +} +/// @endcond + +/** take the function pointer as a function argument */ +template +size_t cat_dump(DumperFn &&dumpfn, substr buf, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + size_t size_for_a = dump(dumpfn, buf, a); + if(C4_UNLIKELY(size_for_a > buf.len)) + buf = buf.first(0); // ensure no more calls + size_t size_for_more = cat_dump(dumpfn, buf, more...); + return size_for_more > size_for_a ? size_for_more : size_for_a; +} + +/** take the function pointer as a template argument */ +template +size_t cat_dump(substr buf, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + size_t size_for_a = dump(buf, a); + if(C4_LIKELY(size_for_a > buf.len)) + buf = buf.first(0); // ensure no more calls + size_t size_for_more = cat_dump(buf, more...); + return size_for_more > size_for_a ? size_for_more : size_for_a; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/// @cond dev +namespace detail { + +// terminates the variadic recursion +template +DumpResults cat_dump_resume(size_t currarg, DumpResults results, substr buf, Arg const& C4_RESTRICT a) +{ + if(C4_LIKELY(results.write_arg(currarg))) + { + size_t sz = dump(buf, a); // yield to the specialized function + if(currarg == results.lastok + 1 && sz <= buf.len) + results.lastok = currarg; + results.bufsize = sz > results.bufsize ? sz : results.bufsize; + } + return results; +} + +// terminates the variadic recursion +template +DumpResults cat_dump_resume(size_t currarg, DumperFn &&dumpfn, DumpResults results, substr buf, Arg const& C4_RESTRICT a) +{ + if(C4_LIKELY(results.write_arg(currarg))) + { + size_t sz = dump(dumpfn, buf, a); // yield to the specialized function + if(currarg == results.lastok + 1 && sz <= buf.len) + results.lastok = currarg; + results.bufsize = sz > results.bufsize ? sz : results.bufsize; + } + return results; +} + +template +DumpResults cat_dump_resume(size_t currarg, DumpResults results, substr buf, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + results = detail::cat_dump_resume(currarg, results, buf, a); + return detail::cat_dump_resume(currarg + 1u, results, buf, more...); +} + +template +DumpResults cat_dump_resume(size_t currarg, DumperFn &&dumpfn, DumpResults results, substr buf, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + results = detail::cat_dump_resume(currarg, dumpfn, results, buf, a); + return detail::cat_dump_resume(currarg + 1u, dumpfn, results, buf, more...); +} +} // namespace detail +/// @endcond + + +template +C4_ALWAYS_INLINE DumpResults cat_dump_resume(DumpResults results, substr buf, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + if(results.bufsize > buf.len) + return results; + return detail::cat_dump_resume(0u, results, buf, a, more...); +} + +template +C4_ALWAYS_INLINE DumpResults cat_dump_resume(DumperFn &&dumpfn, DumpResults results, substr buf, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + if(results.bufsize > buf.len) + return results; + return detail::cat_dump_resume(0u, dumpfn, results, buf, a, more...); +} + +template +C4_ALWAYS_INLINE DumpResults cat_dump_resume(substr buf, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + return detail::cat_dump_resume(0u, DumpResults{}, buf, a, more...); +} + +template +C4_ALWAYS_INLINE DumpResults cat_dump_resume(DumperFn &&dumpfn, substr buf, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + return detail::cat_dump_resume(0u, dumpfn, DumpResults{}, buf, a, more...); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/// @cond dev +// terminate the recursion +template +size_t catsep_dump(DumperFn &&, substr, Sep const& C4_RESTRICT) +{ + return 0; +} + +// terminate the recursion +template +size_t catsep_dump(substr, Sep const& C4_RESTRICT) +{ + return 0; +} +/// @endcond + +/** take the function pointer as a function argument */ +template +size_t catsep_dump(DumperFn &&dumpfn, substr buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + size_t sz = dump(dumpfn, buf, a); + if(C4_UNLIKELY(sz > buf.len)) + buf = buf.first(0); // ensure no more calls + if C4_IF_CONSTEXPR (sizeof...(more) > 0) + { + size_t szsep = dump(dumpfn, buf, sep); + if(C4_UNLIKELY(szsep > buf.len)) + buf = buf.first(0); // ensure no more calls + sz = sz > szsep ? sz : szsep; + } + size_t size_for_more = catsep_dump(dumpfn, buf, sep, more...); + return size_for_more > sz ? size_for_more : sz; +} + +/** take the function pointer as a template argument */ +template +size_t catsep_dump(substr buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + size_t sz = dump(buf, a); + if(C4_UNLIKELY(sz > buf.len)) + buf = buf.first(0); // ensure no more calls + if C4_IF_CONSTEXPR (sizeof...(more) > 0) + { + size_t szsep = dump(buf, sep); + if(C4_UNLIKELY(szsep > buf.len)) + buf = buf.first(0); // ensure no more calls + sz = sz > szsep ? sz : szsep; + } + size_t size_for_more = catsep_dump(buf, sep, more...); + return size_for_more > sz ? size_for_more : sz; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/// @cond dev +namespace detail { +template +void catsep_dump_resume_(size_t currarg, DumpResults *C4_RESTRICT results, substr *C4_RESTRICT buf, Arg const& C4_RESTRICT a) +{ + if(C4_LIKELY(results->write_arg(currarg))) + { + size_t sz = dump(*buf, a); + results->bufsize = sz > results->bufsize ? sz : results->bufsize; + if(C4_LIKELY(sz <= buf->len)) + results->lastok = currarg; + else + buf->len = 0; + } +} + +template +void catsep_dump_resume_(size_t currarg, DumperFn &&dumpfn, DumpResults *C4_RESTRICT results, substr *C4_RESTRICT buf, Arg const& C4_RESTRICT a) +{ + if(C4_LIKELY(results->write_arg(currarg))) + { + size_t sz = dump(dumpfn, *buf, a); + results->bufsize = sz > results->bufsize ? sz : results->bufsize; + if(C4_LIKELY(sz <= buf->len)) + results->lastok = currarg; + else + buf->len = 0; + } +} + +template +C4_ALWAYS_INLINE void catsep_dump_resume(size_t currarg, DumpResults *C4_RESTRICT results, substr *C4_RESTRICT buf, Sep const& C4_RESTRICT, Arg const& C4_RESTRICT a) +{ + detail::catsep_dump_resume_(currarg, results, buf, a); +} + +template +C4_ALWAYS_INLINE void catsep_dump_resume(size_t currarg, DumperFn &&dumpfn, DumpResults *C4_RESTRICT results, substr *C4_RESTRICT buf, Sep const& C4_RESTRICT, Arg const& C4_RESTRICT a) +{ + detail::catsep_dump_resume_(currarg, dumpfn, results, buf, a); +} + +template +C4_ALWAYS_INLINE void catsep_dump_resume(size_t currarg, DumpResults *C4_RESTRICT results, substr *C4_RESTRICT buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + detail::catsep_dump_resume_(currarg , results, buf, a); + detail::catsep_dump_resume_(currarg + 1u, results, buf, sep); + detail::catsep_dump_resume (currarg + 2u, results, buf, sep, more...); +} + +template +C4_ALWAYS_INLINE void catsep_dump_resume(size_t currarg, DumperFn &&dumpfn, DumpResults *C4_RESTRICT results, substr *C4_RESTRICT buf, Sep const& C4_RESTRICT sep, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + detail::catsep_dump_resume_(currarg , dumpfn, results, buf, a); + detail::catsep_dump_resume_(currarg + 1u, dumpfn, results, buf, sep); + detail::catsep_dump_resume (currarg + 2u, dumpfn, results, buf, sep, more...); +} +} // namespace detail +/// @endcond + + +template +C4_ALWAYS_INLINE DumpResults catsep_dump_resume(DumpResults results, substr buf, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...more) +{ + detail::catsep_dump_resume(0u, &results, &buf, sep, more...); + return results; +} + +template +C4_ALWAYS_INLINE DumpResults catsep_dump_resume(DumperFn &&dumpfn, DumpResults results, substr buf, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...more) +{ + detail::catsep_dump_resume(0u, dumpfn, &results, &buf, sep, more...); + return results; +} + +template +C4_ALWAYS_INLINE DumpResults catsep_dump_resume(substr buf, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...more) +{ + DumpResults results; + detail::catsep_dump_resume(0u, &results, &buf, sep, more...); + return results; +} + +template +C4_ALWAYS_INLINE DumpResults catsep_dump_resume(DumperFn &&dumpfn, substr buf, Sep const& C4_RESTRICT sep, Args const& C4_RESTRICT ...more) +{ + DumpResults results; + detail::catsep_dump_resume(0u, dumpfn, &results, &buf, sep, more...); + return results; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** take the function pointer as a function argument */ +template +C4_ALWAYS_INLINE size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt) +{ + // we can dump without using buf + // but we'll only dump if the buffer is ok + if(C4_LIKELY(buf.len > 0 && fmt.len)) + dumpfn(fmt); + return 0u; +} + +/** take the function pointer as a function argument */ +template +C4_ALWAYS_INLINE size_t format_dump(substr buf, csubstr fmt) +{ + // we can dump without using buf + // but we'll only dump if the buffer is ok + if(C4_LIKELY(buf.len > 0 && fmt.len > 0)) + dumpfn(fmt); + return 0u; +} + +/** take the function pointer as a function argument */ +template +size_t format_dump(DumperFn &&dumpfn, substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + // we can dump without using buf + // but we'll only dump if the buffer is ok + size_t pos = fmt.find("{}"); // @todo use _find_fmt() + if(C4_UNLIKELY(pos == csubstr::npos)) + { + if(C4_LIKELY(buf.len > 0 && fmt.len > 0)) + dumpfn(fmt); + return 0u; + } + if(C4_LIKELY(buf.len > 0 && pos > 0)) + dumpfn(fmt.first(pos)); // we can dump without using buf + fmt = fmt.sub(pos + 2); // skip {} do this before assigning to pos again + pos = dump(dumpfn, buf, a); + if(C4_UNLIKELY(pos > buf.len)) + buf.len = 0; // ensure no more calls to dump + size_t size_for_more = format_dump(dumpfn, buf, fmt, more...); + return size_for_more > pos ? size_for_more : pos; +} + +/** take the function pointer as a template argument */ +template +size_t format_dump(substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + // we can dump without using buf + // but we'll only dump if the buffer is ok + size_t pos = fmt.find("{}"); // @todo use _find_fmt() + if(C4_UNLIKELY(pos == csubstr::npos)) + { + if(C4_LIKELY(buf.len > 0 && fmt.len > 0)) + dumpfn(fmt); + return 0u; + } + if(C4_LIKELY(buf.len > 0 && pos > 0)) + dumpfn(fmt.first(pos)); // we can dump without using buf + fmt = fmt.sub(pos + 2); // skip {} do this before assigning to pos again + pos = dump(buf, a); + if(C4_UNLIKELY(pos > buf.len)) + buf.len = 0; // ensure no more calls to dump + size_t size_for_more = format_dump(buf, fmt, more...); + return size_for_more > pos ? size_for_more : pos; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/// @cond dev +namespace detail { + +template +DumpResults format_dump_resume(size_t currarg, DumpResults results, substr buf, csubstr fmt) +{ + // we can dump without using buf + // but we'll only dump if the buffer is ok + if(C4_LIKELY(buf.len > 0)) + { + dumpfn(fmt); + results.lastok = currarg; + } + return results; +} + +template +DumpResults format_dump_resume(size_t currarg, DumperFn &&dumpfn, DumpResults results, substr buf, csubstr fmt) +{ + // we can dump without using buf + // but we'll only dump if the buffer is ok + if(C4_LIKELY(buf.len > 0)) + { + dumpfn(fmt); + results.lastok = currarg; + } + return results; +} + +template +DumpResults format_dump_resume(size_t currarg, DumpResults results, substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + // we need to process the format even if we're not + // going to print the first arguments because we're resuming + size_t pos = fmt.find("{}"); // @todo use _find_fmt() + // we can dump without using buf + // but we'll only dump if the buffer is ok + if(C4_LIKELY(results.write_arg(currarg))) + { + if(C4_UNLIKELY(pos == csubstr::npos)) + { + if(C4_LIKELY(buf.len > 0)) + { + results.lastok = currarg; + dumpfn(fmt); + } + return results; + } + if(C4_LIKELY(buf.len > 0)) + { + results.lastok = currarg; + dumpfn(fmt.first(pos)); + } + } + fmt = fmt.sub(pos + 2); + if(C4_LIKELY(results.write_arg(currarg + 1))) + { + pos = dump(buf, a); + results.bufsize = pos > results.bufsize ? pos : results.bufsize; + if(C4_LIKELY(pos <= buf.len)) + results.lastok = currarg + 1; + else + buf.len = 0; + } + return detail::format_dump_resume(currarg + 2u, results, buf, fmt, more...); +} +/// @endcond + + +template +DumpResults format_dump_resume(size_t currarg, DumperFn &&dumpfn, DumpResults results, substr buf, csubstr fmt, Arg const& C4_RESTRICT a, Args const& C4_RESTRICT ...more) +{ + // we need to process the format even if we're not + // going to print the first arguments because we're resuming + size_t pos = fmt.find("{}"); // @todo use _find_fmt() + // we can dump without using buf + // but we'll only dump if the buffer is ok + if(C4_LIKELY(results.write_arg(currarg))) + { + if(C4_UNLIKELY(pos == csubstr::npos)) + { + if(C4_LIKELY(buf.len > 0)) + { + results.lastok = currarg; + dumpfn(fmt); + } + return results; + } + if(C4_LIKELY(buf.len > 0)) + { + results.lastok = currarg; + dumpfn(fmt.first(pos)); + } + } + fmt = fmt.sub(pos + 2); + if(C4_LIKELY(results.write_arg(currarg + 1))) + { + pos = dump(dumpfn, buf, a); + results.bufsize = pos > results.bufsize ? pos : results.bufsize; + if(C4_LIKELY(pos <= buf.len)) + results.lastok = currarg + 1; + else + buf.len = 0; + } + return detail::format_dump_resume(currarg + 2u, dumpfn, results, buf, fmt, more...); +} +} // namespace detail + + +template +C4_ALWAYS_INLINE DumpResults format_dump_resume(DumpResults results, substr buf, csubstr fmt, Args const& C4_RESTRICT ...more) +{ + return detail::format_dump_resume(0u, results, buf, fmt, more...); +} + +template +C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, DumpResults results, substr buf, csubstr fmt, Args const& C4_RESTRICT ...more) +{ + return detail::format_dump_resume(0u, dumpfn, results, buf, fmt, more...); +} + + +template +C4_ALWAYS_INLINE DumpResults format_dump_resume(substr buf, csubstr fmt, Args const& C4_RESTRICT ...more) +{ + return detail::format_dump_resume(0u, DumpResults{}, buf, fmt, more...); +} + +template +C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, csubstr fmt, Args const& C4_RESTRICT ...more) +{ + return detail::format_dump_resume(0u, dumpfn, DumpResults{}, buf, fmt, more...); +} + + +} // namespace c4 + + +#endif /* C4_DUMP_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/dump.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/enum.hpp +// https://github.com/biojppm/c4core/src/c4/enum.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_ENUM_HPP_ +#define _C4_ENUM_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + +//included above: +//#include + +/** @file enum.hpp utilities for enums: convert to/from string + */ + + +namespace c4 { + +//! taken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum +template +using is_scoped_enum = std::integral_constant::value && !std::is_convertible::value>; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +typedef enum { + EOFFS_NONE = 0, ///< no offset + EOFFS_CLS = 1, ///< get the enum offset for the class name. @see eoffs_cls() + EOFFS_PFX = 2, ///< get the enum offset for the enum prefix. @see eoffs_pfx() + _EOFFS_LAST ///< reserved +} EnumOffsetType; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A simple (proxy) container for the value-name pairs of an enum type. + * Uses linear search for finds; this could be improved for time-critical + * code. */ +template +class EnumSymbols +{ +public: + + struct Sym + { + Enum value; + const char *name; + + bool cmp(const char *s) const; + bool cmp(const char *s, size_t len) const; + + const char *name_offs(EnumOffsetType t) const; + }; + + using const_iterator = Sym const*; + +public: + + template + EnumSymbols(Sym const (&p)[N]) : m_symbols(p), m_num(N) {} + + size_t size() const { return m_num; } + bool empty() const { return m_num == 0; } + + Sym const* get(Enum v) const { auto p = find(v); C4_CHECK_MSG(p != nullptr, "could not find symbol=%zd", (std::ptrdiff_t)v); return p; } + Sym const* get(const char *s) const { auto p = find(s); C4_CHECK_MSG(p != nullptr, "could not find symbol \"%s\"", s); return p; } + Sym const* get(const char *s, size_t len) const { auto p = find(s, len); C4_CHECK_MSG(p != nullptr, "could not find symbol \"%.*s\"", len, s); return p; } + + Sym const* find(Enum v) const; + Sym const* find(const char *s) const; + Sym const* find(const char *s, size_t len) const; + + Sym const& operator[] (size_t i) const { C4_CHECK(i < m_num); return m_symbols[i]; } + + Sym const* begin() const { return m_symbols; } + Sym const* end () const { return m_symbols + m_num; } + +private: + + Sym const* m_symbols; + size_t const m_num; + +}; + +//----------------------------------------------------------------------------- +/** return an EnumSymbols object for the enum type T + * + * @warning SPECIALIZE! This needs to be specialized for each enum + * type. Failure to provide a specialization will cause a linker + * error. */ +template +EnumSymbols const esyms(); + + +/** return the offset for an enum symbol class. For example, + * eoffs_cls() would be 13=strlen("MyEnumClass::"). + * + * With this function you can announce that the full prefix (including + * an eventual enclosing class or C++11 enum class) is of a certain + * length. + * + * @warning Needs to be specialized for each enum class type that + * wants to use this. When no specialization is given, will return + * 0. */ +template +size_t eoffs_cls() +{ + return 0; +} + + +/** return the offset for an enum symbol prefix. This includes + * eoffs_cls(). With this function you can announce that the full + * prefix (including an eventual enclosing class or C++11 enum class + * plus the string prefix) is of a certain length. + * + * @warning Needs to be specialized for each enum class type that + * wants to use this. When no specialization is given, will return + * 0. */ +template +size_t eoffs_pfx() +{ + return 0; +} + + +template +size_t eoffs(EnumOffsetType which) +{ + switch(which) + { + case EOFFS_NONE: + return 0; + case EOFFS_CLS: + return eoffs_cls(); + case EOFFS_PFX: + { + size_t pfx = eoffs_pfx(); + return pfx > 0 ? pfx : eoffs_cls(); + } + default: + C4_ERROR("unknown offset type %d", (int)which); + return 0; + } +} + + +//----------------------------------------------------------------------------- +/** get the enum value corresponding to a c-string */ + +#ifdef __clang__ +# pragma clang diagnostic push +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# if __GNUC__ >= 6 +# pragma GCC diagnostic ignored "-Wnull-dereference" +# endif +#endif + +template +Enum str2e(const char* str) +{ + auto pairs = esyms(); + auto *p = pairs.get(str); + C4_CHECK_MSG(p != nullptr, "no valid enum pair name for '%s'", str); + return p->value; +} + +/** get the c-string corresponding to an enum value */ +template +const char* e2str(Enum e) +{ + auto es = esyms(); + auto *p = es.get(e); + C4_CHECK_MSG(p != nullptr, "no valid enum pair name"); + return p->name; +} + +/** like e2str(), but add an offset. */ +template +const char* e2stroffs(Enum e, EnumOffsetType ot=EOFFS_PFX) +{ + const char *s = e2str(e) + eoffs(ot); + return s; +} + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +//----------------------------------------------------------------------------- +/** Find a symbol by value. Returns nullptr when none is found */ +template +typename EnumSymbols::Sym const* EnumSymbols::find(Enum v) const +{ + for(Sym const* p = this->m_symbols, *e = p+this->m_num; p < e; ++p) + if(p->value == v) + return p; + return nullptr; +} + +/** Find a symbol by name. Returns nullptr when none is found */ +template +typename EnumSymbols::Sym const* EnumSymbols::find(const char *s) const +{ + for(Sym const* p = this->m_symbols, *e = p+this->m_num; p < e; ++p) + if(p->cmp(s)) + return p; + return nullptr; +} + +/** Find a symbol by name. Returns nullptr when none is found */ +template +typename EnumSymbols::Sym const* EnumSymbols::find(const char *s, size_t len) const +{ + for(Sym const* p = this->m_symbols, *e = p+this->m_num; p < e; ++p) + if(p->cmp(s, len)) + return p; + return nullptr; +} + +//----------------------------------------------------------------------------- +template +bool EnumSymbols::Sym::cmp(const char *s) const +{ + if(strcmp(name, s) == 0) + return true; + + for(int i = 1; i < _EOFFS_LAST; ++i) + { + auto o = eoffs((EnumOffsetType)i); + if(o > 0) + if(strcmp(name + o, s) == 0) + return true; + } + + return false; +} + +template +bool EnumSymbols::Sym::cmp(const char *s, size_t len) const +{ + if(strncmp(name, s, len) == 0) + return true; + + size_t nlen = 0; + for(int i = 1; i <_EOFFS_LAST; ++i) + { + auto o = eoffs((EnumOffsetType)i); + if(o > 0) + { + if(!nlen) + { + nlen = strlen(name); + } + C4_ASSERT(o < nlen); + size_t rem = nlen - o; + auto m = len > rem ? len : rem; + if(len >= m && strncmp(name + o, s, m) == 0) + return true; + } + } + + return false; +} + +//----------------------------------------------------------------------------- +template +const char* EnumSymbols::Sym::name_offs(EnumOffsetType t) const +{ + C4_ASSERT(eoffs(t) < strlen(name)); + return name + eoffs(t); +} + +} // namespace c4 + +#endif // _C4_ENUM_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/enum.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/bitmask.hpp +// https://github.com/biojppm/c4core/src/c4/bitmask.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_BITMASK_HPP_ +#define _C4_BITMASK_HPP_ + +/** @file bitmask.hpp bitmask utilities */ + +//included above: +//#include +//included above: +//#include + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/enum.hpp +//#include "c4/enum.hpp" +#if !defined(C4_ENUM_HPP_) && !defined(_C4_ENUM_HPP_) +#error "amalgamate: file c4/enum.hpp must have been included at this point" +#endif /* C4_ENUM_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/format.hpp +//#include "c4/format.hpp" +#if !defined(C4_FORMAT_HPP_) && !defined(_C4_FORMAT_HPP_) +#error "amalgamate: file c4/format.hpp must have been included at this point" +#endif /* C4_FORMAT_HPP_ */ + + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable : 4996) // 'strncpy', fopen, etc: This function or variable may be unsafe +#elif defined(__clang__) +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# if __GNUC__ >= 8 +# pragma GCC diagnostic ignored "-Wstringop-truncation" +# pragma GCC diagnostic ignored "-Wstringop-overflow" +# endif +#endif + +namespace c4 { + +//----------------------------------------------------------------------------- +/** write a bitmask to a stream, formatted as a string */ + +template +Stream& bm2stream(Stream &s, typename std::underlying_type::type bits, EnumOffsetType offst=EOFFS_PFX) +{ + using I = typename std::underlying_type::type; + bool written = false; + + auto const& pairs = esyms(); + + // write non null value + if(bits) + { + // do reverse iteration to give preference to composite enum symbols, + // which are likely to appear at the end of the enum sequence + for(size_t i = pairs.size() - 1; i != size_t(-1); --i) + { + auto p = pairs[i]; + I b(static_cast(p.value)); + if(b && (bits & b) == b) + { + if(written) s << '|'; // append bit-or character + written = true; + s << p.name_offs(offst); // append bit string + bits &= ~b; + } + } + return s; + } + else + { + // write a null value + for(size_t i = pairs.size() - 1; i != size_t(-1); --i) + { + auto p = pairs[i]; + I b(static_cast(p.value)); + if(b == 0) + { + s << p.name_offs(offst); + written = true; + break; + } + } + } + if(!written) + { + s << '0'; + } + return s; +} + +template +typename std::enable_if::value, Stream&>::type +bm2stream(Stream &s, Enum value, EnumOffsetType offst=EOFFS_PFX) +{ + using I = typename std::underlying_type::type; + return bm2stream(s, static_cast(value), offst); +} + + +//----------------------------------------------------------------------------- + +// some utility macros, undefed below + +/// @cond dev + +/* Execute `code` if the `num` of characters is available in the str + * buffer. This macro simplifies the code for bm2str(). + * @todo improve performance by writing from the end and moving only once. */ +#define _c4prependchars(code, num) \ + if(str && (pos + num <= sz)) \ + { \ + /* move the current string to the right */ \ + memmove(str + num, str, pos); \ + /* now write in the beginning of the string */ \ + code; \ + } \ + else if(str && sz) \ + { \ + C4_ERROR("cannot write to string pos=%d num=%d sz=%d", \ + (int)pos, (int)num, (int)sz); \ + } \ + pos += num + +/* Execute `code` if the `num` of characters is available in the str + * buffer. This macro simplifies the code for bm2str(). */ +#define _c4appendchars(code, num) \ + if(str && (pos + num <= sz)) \ + { \ + code; \ + } \ + else if(str && sz) \ + { \ + C4_ERROR("cannot write to string pos=%d num=%d sz=%d", \ + (int)pos, (int)num, (int)sz); \ + } \ + pos += num + +/// @endcond + + +/** convert a bitmask to string. + * return the number of characters written. To find the needed size, + * call first with str=nullptr and sz=0 */ +template +size_t bm2str +( + typename std::underlying_type::type bits, + char *str=nullptr, + size_t sz=0, + EnumOffsetType offst=EOFFS_PFX +) +{ + using I = typename std::underlying_type::type; + C4_ASSERT((str == nullptr) == (sz == 0)); + + auto syms = esyms(); + size_t pos = 0; + typename EnumSymbols::Sym const* C4_RESTRICT zero = nullptr; + + // do reverse iteration to give preference to composite enum symbols, + // which are likely to appear later in the enum sequence + for(size_t i = syms.size()-1; i != size_t(-1); --i) + { + auto const &C4_RESTRICT p = syms[i]; // do not copy, we are assigning to `zero` + I b = static_cast(p.value); + if(b == 0) + { + zero = &p; // save this symbol for later + } + else if((bits & b) == b) + { + bits &= ~b; + // append bit-or character + if(pos > 0) + { + _c4prependchars(*str = '|', 1); + } + // append bit string + const char *pname = p.name_offs(offst); + size_t len = strlen(pname); + _c4prependchars(strncpy(str, pname, len), len); + } + } + + C4_CHECK_MSG(bits == 0, "could not find all bits"); + if(pos == 0) // make sure at least something is written + { + if(zero) // if we have a zero symbol, use that + { + const char *pname = zero->name_offs(offst); + size_t len = strlen(pname); + _c4prependchars(strncpy(str, pname, len), len); + } + else // otherwise just write an integer zero + { + _c4prependchars(*str = '0', 1); + } + } + _c4appendchars(str[pos] = '\0', 1); + + return pos; +} + + +// cleanup! +#undef _c4appendchars +#undef _c4prependchars + + +/** scoped enums do not convert automatically to their underlying type, + * so this SFINAE overload will accept scoped enum symbols and cast them + * to the underlying type */ +template +typename std::enable_if::value, size_t>::type +bm2str +( + Enum bits, + char *str=nullptr, + size_t sz=0, + EnumOffsetType offst=EOFFS_PFX +) +{ + using I = typename std::underlying_type::type; + return bm2str(static_cast(bits), str, sz, offst); +} + + +//----------------------------------------------------------------------------- + +namespace detail { + +#ifdef __clang__ +# pragma clang diagnostic push +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# if __GNUC__ >= 6 +# pragma GCC diagnostic ignored "-Wnull-dereference" +# endif +#endif + +template +typename std::underlying_type::type str2bm_read_one(const char *str, size_t sz, bool alnum) +{ + using I = typename std::underlying_type::type; + auto pairs = esyms(); + if(alnum) + { + auto *p = pairs.find(str, sz); + C4_CHECK_MSG(p != nullptr, "no valid enum pair name for '%.*s'", (int)sz, str); + return static_cast(p->value); + } + I tmp; + size_t len = uncat(csubstr(str, sz), tmp); + C4_CHECK_MSG(len != csubstr::npos, "could not read string as an integral type: '%.*s'", (int)sz, str); + return tmp; +} + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif +} // namespace detail + +/** convert a string to a bitmask */ +template +typename std::underlying_type::type str2bm(const char *str, size_t sz) +{ + using I = typename std::underlying_type::type; + + I val = 0; + bool started = false; + bool alnum = false, num = false; + const char *f = nullptr, *pc = str; + for( ; pc < str+sz; ++pc) + { + const char c = *pc; + if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') + { + C4_CHECK(( ! num) || ((pc - f) == 1 && (c == 'x' || c == 'X'))); // accept hexadecimal numbers + if( ! started) + { + f = pc; + alnum = started = true; + } + } + else if(c >= '0' && c <= '9') + { + C4_CHECK( ! alnum); + if(!started) + { + f = pc; + num = started = true; + } + } + else if(c == ':' || c == ' ') + { + // skip this char + } + else if(c == '|' || c == '\0') + { + C4_ASSERT(num != alnum); + C4_ASSERT(pc >= f); + val |= detail::str2bm_read_one(f, static_cast(pc-f), alnum); + started = num = alnum = false; + if(c == '\0') + { + return val; + } + } + else + { + C4_ERROR("bad character '%c' in bitmask string", c); + } + } + + if(f) + { + C4_ASSERT(num != alnum); + C4_ASSERT(pc >= f); + val |= detail::str2bm_read_one(f, static_cast(pc-f), alnum); + } + + return val; +} + +/** convert a string to a bitmask */ +template +typename std::underlying_type::type str2bm(const char *str) +{ + return str2bm(str, strlen(str)); +} + +} // namespace c4 + +#ifdef _MSC_VER +# pragma warning(pop) +#elif defined(__clang__) +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif // _C4_BITMASK_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/bitmask.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/span.hpp +// https://github.com/biojppm/c4core/src/c4/span.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_SPAN_HPP_ +#define _C4_SPAN_HPP_ + +/** @file span.hpp Provides span classes. */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/config.hpp +//#include "c4/config.hpp" +#if !defined(C4_CONFIG_HPP_) && !defined(_C4_CONFIG_HPP_) +#error "amalgamate: file c4/config.hpp must have been included at this point" +#endif /* C4_CONFIG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/szconv.hpp +//#include "c4/szconv.hpp" +#if !defined(C4_SZCONV_HPP_) && !defined(_C4_SZCONV_HPP_) +#error "amalgamate: file c4/szconv.hpp must have been included at this point" +#endif /* C4_SZCONV_HPP_ */ + + +//included above: +//#include + +namespace c4 { + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** a crtp base for implementing span classes + * + * A span is a non-owning range of elements contiguously stored in memory. + * Unlike STL's array_view, the span allows write-access to its members. + * + * To obtain subspans from a span, the following const member functions + * are available: + * - subspan(first, num) + * - range(first, last) + * - first(num) + * - last(num) + * + * A span can also be resized via the following non-const member functions: + * - resize(sz) + * - ltrim(num) + * - rtrim(num) + * + * @see span + * @see cspan + * @see spanrs + * @see cspanrs + * @see spanrsl + * @see cspanrsl + */ +template +class span_crtp +{ +// some utility defines, undefined at the end of this class +#define _c4this ((SpanImpl *)this) +#define _c4cthis ((SpanImpl const*)this) +#define _c4ptr ((SpanImpl *)this)->m_ptr +#define _c4cptr ((SpanImpl const*)this)->m_ptr +#define _c4sz ((SpanImpl *)this)->m_size +#define _c4csz ((SpanImpl const*)this)->m_size + +public: + + _c4_DEFINE_ARRAY_TYPES(T, I); + +public: + + C4_ALWAYS_INLINE constexpr I value_size() const noexcept { return sizeof(T); } + C4_ALWAYS_INLINE constexpr I elm_size () const noexcept { return sizeof(T); } + C4_ALWAYS_INLINE constexpr I type_size () const noexcept { return sizeof(T); } + C4_ALWAYS_INLINE I byte_size () const noexcept { return _c4csz*sizeof(T); } + + C4_ALWAYS_INLINE bool empty() const noexcept { return _c4csz == 0; } + C4_ALWAYS_INLINE I size() const noexcept { return _c4csz; } + //C4_ALWAYS_INLINE I capacity() const noexcept { return _c4sz; } // this must be defined by impl classes + + C4_ALWAYS_INLINE void clear() noexcept { _c4sz = 0; } + + C4_ALWAYS_INLINE T * data() noexcept { return _c4ptr; } + C4_ALWAYS_INLINE T const* data() const noexcept { return _c4cptr; } + + C4_ALWAYS_INLINE iterator begin() noexcept { return _c4ptr; } + C4_ALWAYS_INLINE const_iterator begin() const noexcept { return _c4cptr; } + C4_ALWAYS_INLINE const_iterator cbegin() const noexcept { return _c4cptr; } + + C4_ALWAYS_INLINE iterator end() noexcept { return _c4ptr + _c4sz; } + C4_ALWAYS_INLINE const_iterator end() const noexcept { return _c4cptr + _c4csz; } + C4_ALWAYS_INLINE const_iterator cend() const noexcept { return _c4cptr + _c4csz; } + + C4_ALWAYS_INLINE reverse_iterator rbegin() noexcept { return reverse_iterator(_c4ptr + _c4sz); } + C4_ALWAYS_INLINE const_reverse_iterator rbegin() const noexcept { return reverse_iterator(_c4cptr + _c4sz); } + C4_ALWAYS_INLINE const_reverse_iterator crbegin() const noexcept { return reverse_iterator(_c4cptr + _c4sz); } + + C4_ALWAYS_INLINE reverse_iterator rend() noexcept { return const_reverse_iterator(_c4ptr); } + C4_ALWAYS_INLINE const_reverse_iterator rend() const noexcept { return const_reverse_iterator(_c4cptr); } + C4_ALWAYS_INLINE const_reverse_iterator crend() const noexcept { return const_reverse_iterator(_c4cptr); } + + C4_ALWAYS_INLINE T & front() C4_NOEXCEPT_X { C4_XASSERT(!empty()); return _c4ptr [0]; } + C4_ALWAYS_INLINE T const& front() const C4_NOEXCEPT_X { C4_XASSERT(!empty()); return _c4cptr[0]; } + + C4_ALWAYS_INLINE T & back() C4_NOEXCEPT_X { C4_XASSERT(!empty()); return _c4ptr [_c4sz - 1]; } + C4_ALWAYS_INLINE T const& back() const C4_NOEXCEPT_X { C4_XASSERT(!empty()); return _c4cptr[_c4csz - 1]; } + + C4_ALWAYS_INLINE T & operator[] (I i) C4_NOEXCEPT_X { C4_XASSERT(i >= 0 && i < _c4sz ); return _c4ptr [i]; } + C4_ALWAYS_INLINE T const& operator[] (I i) const C4_NOEXCEPT_X { C4_XASSERT(i >= 0 && i < _c4csz); return _c4cptr[i]; } + + C4_ALWAYS_INLINE SpanImpl subspan(I first, I num) const C4_NOEXCEPT_X + { + C4_XASSERT((first >= 0 && first < _c4csz) || (first == _c4csz && num == 0)); + C4_XASSERT((first + num >= 0) && (first + num <= _c4csz)); + return _c4cthis->_select(_c4cptr + first, num); + } + C4_ALWAYS_INLINE SpanImpl subspan(I first) const C4_NOEXCEPT_X ///< goes up until the end of the span + { + C4_XASSERT(first >= 0 && first <= _c4csz); + return _c4cthis->_select(_c4cptr + first, _c4csz - first); + } + + C4_ALWAYS_INLINE SpanImpl range(I first, I last) const C4_NOEXCEPT_X ///< last element is NOT included + { + C4_XASSERT(((first >= 0) && (first < _c4csz)) || (first == _c4csz && first == last)); + C4_XASSERT((last >= 0) && (last <= _c4csz)); + C4_XASSERT(last >= first); + return _c4cthis->_select(_c4cptr + first, last - first); + } + C4_ALWAYS_INLINE SpanImpl range(I first) const C4_NOEXCEPT_X ///< goes up until the end of the span + { + C4_XASSERT(((first >= 0) && (first <= _c4csz))); + return _c4cthis->_select(_c4cptr + first, _c4csz - first); + } + + C4_ALWAYS_INLINE SpanImpl first(I num) const C4_NOEXCEPT_X ///< get the first num elements, starting at 0 + { + C4_XASSERT((num >= 0) && (num <= _c4csz)); + return _c4cthis->_select(_c4cptr, num); + } + C4_ALWAYS_INLINE SpanImpl last(I num) const C4_NOEXCEPT_X ///< get the last num elements, starting at size()-num + { + C4_XASSERT((num >= 0) && (num <= _c4csz)); + return _c4cthis->_select(_c4cptr + _c4csz - num, num); + } + + bool is_subspan(span_crtp const& ss) const noexcept + { + if(_c4cptr == nullptr) return false; + auto *b = begin(), *e = end(); + auto *ssb = ss.begin(), *sse = ss.end(); + if(ssb >= b && sse <= e) + { + return true; + } + else + { + return false; + } + } + + /** COMPLement Left: return the complement to the left of the beginning of the given subspan. + * If ss does not begin inside this, returns an empty substring. */ + SpanImpl compll(span_crtp const& ss) const C4_NOEXCEPT_X + { + auto ssb = ss.begin(); + auto b = begin(); + auto e = end(); + if(ssb >= b && ssb <= e) + { + return subspan(0, static_cast(ssb - b)); + } + else + { + return subspan(0, 0); + } + } + + /** COMPLement Right: return the complement to the right of the end of the given subspan. + * If ss does not end inside this, returns an empty substring. */ + SpanImpl complr(span_crtp const& ss) const C4_NOEXCEPT_X + { + auto sse = ss.end(); + auto b = begin(); + auto e = end(); + if(sse >= b && sse <= e) + { + return subspan(static_cast(sse - b), static_cast(e - sse)); + } + else + { + return subspan(0, 0); + } + } + + C4_ALWAYS_INLINE bool same_span(span_crtp const& that) const noexcept + { + return size() == that.size() && data() == that.data(); + } + template + C4_ALWAYS_INLINE bool same_span(span_crtp const& that) const C4_NOEXCEPT_X + { + I tsz = szconv(that.size()); // x-asserts that the size does not overflow + return size() == tsz && data() == that.data(); + } + +#undef _c4this +#undef _c4cthis +#undef _c4ptr +#undef _c4cptr +#undef _c4sz +#undef _c4csz +}; + +//----------------------------------------------------------------------------- +template +inline constexpr bool operator== +( + span_crtp const& l, + span_crtp const& r +) +{ +#if C4_CPP >= 14 + return std::equal(l.begin(), l.end(), r.begin(), r.end()); +#else + return l.same_span(r) || std::equal(l.begin(), l.end(), r.begin()); +#endif +} + +template +inline constexpr bool operator!= +( + span_crtp const& l, + span_crtp const& r +) +{ + return ! (l == r); +} + +//----------------------------------------------------------------------------- +template +inline constexpr bool operator< +( + span_crtp const& l, + span_crtp const& r +) +{ + return std::lexicographical_compare(l.begin(), l.end(), r.begin(), r.end()); +} + +template +inline constexpr bool operator<= +( + span_crtp const& l, + span_crtp const& r +) +{ + return ! (l > r); +} + +//----------------------------------------------------------------------------- +template +inline constexpr bool operator> +( + span_crtp const& l, + span_crtp const& r +) +{ + return r < l; +} + +//----------------------------------------------------------------------------- +template +inline constexpr bool operator>= +( + span_crtp const& l, + span_crtp const& r +) +{ + return ! (l < r); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A non-owning span of elements contiguously stored in memory. */ +template +class span : public span_crtp> +{ + friend class span_crtp>; + + T * C4_RESTRICT m_ptr; + I m_size; + + C4_ALWAYS_INLINE span _select(T *p, I sz) const { return span(p, sz); } + +public: + + _c4_DEFINE_ARRAY_TYPES(T, I); + using NCT = typename std::remove_const::type; //!< NCT=non const type + using CT = typename std::add_const::type; //!< CT=const type + using const_type = span; + + /// convert automatically to span of const T + operator span () const { span s(m_ptr, m_size); return s; } + +public: + + C4_ALWAYS_INLINE C4_CONSTEXPR14 span() noexcept : m_ptr{nullptr}, m_size{0} {} + + span(span const&) = default; + span(span &&) = default; + + span& operator= (span const&) = default; + span& operator= (span &&) = default; + +public: + + /** @name Construction and assignment from same type */ + /** @{ */ + + template C4_ALWAYS_INLINE C4_CONSTEXPR14 span (T (&arr)[N]) noexcept : m_ptr{arr}, m_size{N} {} + template C4_ALWAYS_INLINE C4_CONSTEXPR14 void assign(T (&arr)[N]) noexcept { m_ptr = arr; m_size = N; } + + C4_ALWAYS_INLINE C4_CONSTEXPR14 span(T *p, I sz) noexcept : m_ptr{p}, m_size{sz} {} + C4_ALWAYS_INLINE C4_CONSTEXPR14 void assign(T *p, I sz) noexcept { m_ptr = p; m_size = sz; } + + C4_ALWAYS_INLINE C4_CONSTEXPR14 span (c4::aggregate_t, std::initializer_list il) noexcept : m_ptr{&*il.begin()}, m_size{il.size()} {} + C4_ALWAYS_INLINE C4_CONSTEXPR14 void assign(c4::aggregate_t, std::initializer_list il) noexcept { m_ptr = &*il.begin(); m_size = il.size(); } + + /** @} */ + +public: + + C4_ALWAYS_INLINE I capacity() const noexcept { return m_size; } + + C4_ALWAYS_INLINE void resize(I sz) C4_NOEXCEPT_A { C4_ASSERT(sz <= m_size); m_size = sz; } + C4_ALWAYS_INLINE void rtrim (I n ) C4_NOEXCEPT_A { C4_ASSERT(n >= 0 && n < m_size); m_size -= n; } + C4_ALWAYS_INLINE void ltrim (I n ) C4_NOEXCEPT_A { C4_ASSERT(n >= 0 && n < m_size); m_size -= n; m_ptr += n; } + +}; +template using cspan = span; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A non-owning span resizeable up to a capacity. Subselection or resizing + * will keep the original provided it starts at begin(). If subselection or + * resizing change the pointer, then the original capacity information will + * be lost. + * + * Thus, resizing via resize() and ltrim() and subselecting via first() + * or any of subspan() or range() when starting from the beginning will keep + * the original capacity. OTOH, using last(), or any of subspan() or range() + * with an offset from the start will remove from capacity (shifting the + * pointer) by the corresponding offset. If this is undesired, then consider + * using spanrsl. + * + * @see spanrs for a span resizeable on the right + * @see spanrsl for a span resizeable on the right and left + */ + +template +class spanrs : public span_crtp> +{ + friend class span_crtp>; + + T * C4_RESTRICT m_ptr; + I m_size; + I m_capacity; + + C4_ALWAYS_INLINE spanrs _select(T *p, I sz) const noexcept + { + C4_ASSERT(p >= m_ptr); + size_t delta = static_cast(p - m_ptr); + C4_ASSERT(m_capacity >= delta); + return spanrs(p, sz, static_cast(m_capacity - delta)); + } + +public: + + _c4_DEFINE_ARRAY_TYPES(T, I); + using NCT = typename std::remove_const::type; //!< NCT=non const type + using CT = typename std::add_const::type; //!< CT=const type + using const_type = spanrs; + + /// convert automatically to span of T + C4_ALWAYS_INLINE operator span () const noexcept { return span(m_ptr, m_size); } + /// convert automatically to span of const T + //C4_ALWAYS_INLINE operator span () const noexcept { span s(m_ptr, m_size); return s; } + /// convert automatically to spanrs of const T + C4_ALWAYS_INLINE operator spanrs () const noexcept { spanrs s(m_ptr, m_size, m_capacity); return s; } + +public: + + C4_ALWAYS_INLINE spanrs() noexcept : m_ptr{nullptr}, m_size{0}, m_capacity{0} {} + + spanrs(spanrs const&) = default; + spanrs(spanrs &&) = default; + + spanrs& operator= (spanrs const&) = default; + spanrs& operator= (spanrs &&) = default; + +public: + + /** @name Construction and assignment from same type */ + /** @{ */ + + C4_ALWAYS_INLINE spanrs(T *p, I sz) noexcept : m_ptr{p}, m_size{sz}, m_capacity{sz} {} + /** @warning will reset the capacity to sz */ + C4_ALWAYS_INLINE void assign(T *p, I sz) noexcept { m_ptr = p; m_size = sz; m_capacity = sz; } + + C4_ALWAYS_INLINE spanrs(T *p, I sz, I cap) noexcept : m_ptr{p}, m_size{sz}, m_capacity{cap} {} + C4_ALWAYS_INLINE void assign(T *p, I sz, I cap) noexcept { m_ptr = p; m_size = sz; m_capacity = cap; } + + template C4_ALWAYS_INLINE spanrs(T (&arr)[N]) noexcept : m_ptr{arr}, m_size{N}, m_capacity{N} {} + template C4_ALWAYS_INLINE void assign(T (&arr)[N]) noexcept { m_ptr = arr; m_size = N; m_capacity = N; } + + C4_ALWAYS_INLINE spanrs(c4::aggregate_t, std::initializer_list il) noexcept : m_ptr{il.begin()}, m_size{il.size()}, m_capacity{il.size()} {} + C4_ALWAYS_INLINE void assign(c4::aggregate_t, std::initializer_list il) noexcept { m_ptr = il.begin(); m_size = il.size(); m_capacity = il.size(); } + + /** @} */ + +public: + + C4_ALWAYS_INLINE I capacity() const noexcept { return m_capacity; } + + C4_ALWAYS_INLINE void resize(I sz) C4_NOEXCEPT_A { C4_ASSERT(sz <= m_capacity); m_size = sz; } + C4_ALWAYS_INLINE void rtrim (I n ) C4_NOEXCEPT_A { C4_ASSERT(n >= 0 && n < m_size); m_size -= n; } + C4_ALWAYS_INLINE void ltrim (I n ) C4_NOEXCEPT_A { C4_ASSERT(n >= 0 && n < m_size); m_size -= n; m_ptr += n; m_capacity -= n; } + +}; +template using cspanrs = spanrs; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A non-owning span which always retains the capacity of the original + * range it was taken from (though it may loose its original size). + * The resizing methods resize(), ltrim(), rtrim() as well + * as the subselection methods subspan(), range(), first() and last() can be + * used at will without loosing the original capacity; the full capacity span + * can always be recovered by calling original(). + */ +template +class spanrsl : public span_crtp> +{ + friend class span_crtp>; + + T *C4_RESTRICT m_ptr; ///< the current ptr. the original ptr is (m_ptr - m_offset). + I m_size; ///< the current size. the original size is unrecoverable. + I m_capacity; ///< the current capacity. the original capacity is (m_capacity + m_offset). + I m_offset; ///< the offset of the current m_ptr to the start of the original memory block. + + C4_ALWAYS_INLINE spanrsl _select(T *p, I sz) const noexcept + { + C4_ASSERT(p >= m_ptr); + I delta = static_cast(p - m_ptr); + C4_ASSERT(m_capacity >= delta); + return spanrsl(p, sz, static_cast(m_capacity - delta), m_offset + delta); + } + +public: + + _c4_DEFINE_ARRAY_TYPES(T, I); + using NCT = typename std::remove_const::type; //!< NCT=non const type + using CT = typename std::add_const::type; //!< CT=const type + using const_type = spanrsl; + + C4_ALWAYS_INLINE operator span () const noexcept { return span(m_ptr, m_size); } + C4_ALWAYS_INLINE operator spanrs () const noexcept { return spanrs(m_ptr, m_size, m_capacity); } + C4_ALWAYS_INLINE operator spanrsl () const noexcept { return spanrsl(m_ptr, m_size, m_capacity, m_offset); } + +public: + + C4_ALWAYS_INLINE spanrsl() noexcept : m_ptr{nullptr}, m_size{0}, m_capacity{0}, m_offset{0} {} + + spanrsl(spanrsl const&) = default; + spanrsl(spanrsl &&) = default; + + spanrsl& operator= (spanrsl const&) = default; + spanrsl& operator= (spanrsl &&) = default; + +public: + + C4_ALWAYS_INLINE spanrsl(T *p, I sz) noexcept : m_ptr{p}, m_size{sz}, m_capacity{sz}, m_offset{0} {} + C4_ALWAYS_INLINE void assign(T *p, I sz) noexcept { m_ptr = p; m_size = sz; m_capacity = sz; m_offset = 0; } + + C4_ALWAYS_INLINE spanrsl(T *p, I sz, I cap) noexcept : m_ptr{p}, m_size{sz}, m_capacity{cap}, m_offset{0} {} + C4_ALWAYS_INLINE void assign(T *p, I sz, I cap) noexcept { m_ptr = p; m_size = sz; m_capacity = cap; m_offset = 0; } + + C4_ALWAYS_INLINE spanrsl(T *p, I sz, I cap, I offs) noexcept : m_ptr{p}, m_size{sz}, m_capacity{cap}, m_offset{offs} {} + C4_ALWAYS_INLINE void assign(T *p, I sz, I cap, I offs) noexcept { m_ptr = p; m_size = sz; m_capacity = cap; m_offset = offs; } + + template C4_ALWAYS_INLINE spanrsl(T (&arr)[N]) noexcept : m_ptr{arr}, m_size{N}, m_capacity{N}, m_offset{0} {} + template C4_ALWAYS_INLINE void assign(T (&arr)[N]) noexcept { m_ptr = arr; m_size = N; m_capacity = N; m_offset = 0; } + + C4_ALWAYS_INLINE spanrsl(c4::aggregate_t, std::initializer_list il) noexcept : m_ptr{il.begin()}, m_size{il.size()}, m_capacity{il.size()}, m_offset{0} {} + C4_ALWAYS_INLINE void assign (c4::aggregate_t, std::initializer_list il) noexcept { m_ptr = il.begin(); m_size = il.size(); m_capacity = il.size(); m_offset = 0; } + +public: + + C4_ALWAYS_INLINE I offset() const noexcept { return m_offset; } + C4_ALWAYS_INLINE I capacity() const noexcept { return m_capacity; } + + C4_ALWAYS_INLINE void resize(I sz) C4_NOEXCEPT_A { C4_ASSERT(sz <= m_capacity); m_size = sz; } + C4_ALWAYS_INLINE void rtrim (I n ) C4_NOEXCEPT_A { C4_ASSERT(n >= 0 && n < m_size); m_size -= n; } + C4_ALWAYS_INLINE void ltrim (I n ) C4_NOEXCEPT_A { C4_ASSERT(n >= 0 && n < m_size); m_size -= n; m_ptr += n; m_offset += n; m_capacity -= n; } + + /** recover the original span as an spanrsl */ + C4_ALWAYS_INLINE spanrsl original() const + { + return spanrsl(m_ptr - m_offset, m_capacity + m_offset, m_capacity + m_offset, 0); + } + /** recover the original span as a different span type. Example: spanrs<...> orig = s.original(); */ + template class OtherSpanType> + C4_ALWAYS_INLINE OtherSpanType original() + { + return OtherSpanType(m_ptr - m_offset, m_capacity + m_offset); + } +}; +template using cspanrsl = spanrsl; + + +} // namespace c4 + + +#endif /* _C4_SPAN_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/span.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/type_name.hpp +// https://github.com/biojppm/c4core/src/c4/type_name.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_TYPENAME_HPP_ +#define _C4_TYPENAME_HPP_ + +/** @file type_name.hpp compile-time type name */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/span.hpp +//#include "c4/span.hpp" +#if !defined(C4_SPAN_HPP_) && !defined(_C4_SPAN_HPP_) +#error "amalgamate: file c4/span.hpp must have been included at this point" +#endif /* C4_SPAN_HPP_ */ + + +/// @cond dev +struct _c4t +{ + const char *str; + size_t sz; + template + constexpr _c4t(const char (&s)[N]) : str(s), sz(N-1) {} // take off the \0 +}; +// this is a more abbreviated way of getting the type name +// (if we used span in the return type, the name would involve +// templates and would create longer type name strings, +// as well as larger differences between compilers) +template +C4_CONSTEXPR14 C4_ALWAYS_INLINE +_c4t _c4tn() +{ + auto p = _c4t(C4_PRETTY_FUNC); + return p; +} +/// @endcond + + +namespace c4 { + +/** compile-time type name + * @see http://stackoverflow.com/a/20170989/5875572 */ +template +C4_CONSTEXPR14 cspan type_name() +{ + const _c4t p = _c4tn(); + +#if (0) // _C4_THIS_IS_A_DEBUG_SCAFFOLD + for(size_t index = 0; index < p.sz; ++index) + { + printf(" %2c", p.str[index]); + } + printf("\n"); + for(size_t index = 0; index < p.sz; ++index) + { + printf(" %2d", (int)index); + } + printf("\n"); +#endif + +#if defined(_MSC_VER) +# if defined(__clang__) // Visual Studio has the clang toolset + // example: + // ..........................xxx. + // _c4t __cdecl _c4tn() [T = int] + enum : size_t { tstart = 26, tend = 1}; + +# elif defined(C4_MSVC_2015) || defined(C4_MSVC_2017) || defined(C4_MSVC_2019) || defined(C4_MSVC_2022) + // Note: subtract 7 at the end because the function terminates with ">(void)" in VS2015+ + cspan::size_type tstart = 26, tend = 7; + + const char *s = p.str + tstart; // look at the start + + // we're not using strcmp() or memcmp() to spare the #include + + // does it start with 'class '? + if(p.sz > 6 && s[0] == 'c' && s[1] == 'l' && s[2] == 'a' && s[3] == 's' && s[4] == 's' && s[5] == ' ') + { + tstart += 6; + } + // does it start with 'struct '? + else if(p.sz > 7 && s[0] == 's' && s[1] == 't' && s[2] == 'r' && s[3] == 'u' && s[4] == 'c' && s[5] == 't' && s[6] == ' ') + { + tstart += 7; + } + +# else + C4_NOT_IMPLEMENTED(); +# endif + +#elif defined(__ICC) + // example: + // ........................xxx. + // "_c4t _c4tn() [with T = int]" + enum : size_t { tstart = 23, tend = 1}; + +#elif defined(__clang__) + // example: + // ...................xxx. + // "_c4t _c4tn() [T = int]" + enum : size_t { tstart = 18, tend = 1}; + +#elif defined(__GNUC__) + #if __GNUC__ >= 7 && C4_CPP >= 14 + // example: + // ..................................xxx. + // "constexpr _c4t _c4tn() [with T = int]" + enum : size_t { tstart = 33, tend = 1 }; + #else + // example: + // ........................xxx. + // "_c4t _c4tn() [with T = int]" + enum : size_t { tstart = 23, tend = 1 }; + #endif +#else + C4_NOT_IMPLEMENTED(); +#endif + + cspan o(p.str + tstart, p.sz - tstart - tend); + + return o; +} + +/** compile-time type name + * @overload */ +template +C4_CONSTEXPR14 C4_ALWAYS_INLINE cspan type_name(T const&) +{ + return type_name(); +} + +} // namespace c4 + +#endif //_C4_TYPENAME_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/type_name.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/base64.hpp +// https://github.com/biojppm/c4core/src/c4/base64.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_BASE64_HPP_ +#define _C4_BASE64_HPP_ + +/** @file base64.hpp encoding/decoding for base64. + * @see https://en.wikipedia.org/wiki/Base64 + * @see https://www.base64encode.org/ + * */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/charconv.hpp +//#include "c4/charconv.hpp" +#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_) +#error "amalgamate: file c4/charconv.hpp must have been included at this point" +#endif /* C4_CHARCONV_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/blob.hpp +//#include "c4/blob.hpp" +#if !defined(C4_BLOB_HPP_) && !defined(_C4_BLOB_HPP_) +#error "amalgamate: file c4/blob.hpp must have been included at this point" +#endif /* C4_BLOB_HPP_ */ + + +namespace c4 { + +/** check that the given buffer is a valid base64 encoding + * @see https://en.wikipedia.org/wiki/Base64 */ +bool base64_valid(csubstr encoded); + +/** base64-encode binary data. + * @param encoded [out] output buffer for encoded data + * @param data [in] the input buffer with the binary data + * @return the number of bytes needed to return the output. No writes occur beyond the end of the output buffer. + * @see https://en.wikipedia.org/wiki/Base64 */ +size_t base64_encode(substr encoded, cblob data); + +/** decode the base64 encoding in the given buffer + * @param encoded [in] the encoded base64 + * @param data [out] the output buffer + * @return the number of bytes needed to return the output.. No writes occur beyond the end of the output buffer. + * @see https://en.wikipedia.org/wiki/Base64 */ +size_t base64_decode(csubstr encoded, blob data); + + +namespace fmt { + +template +struct base64_wrapper_ +{ + blob_ data; + base64_wrapper_() : data() {} + base64_wrapper_(blob_ blob) : data(blob) {} +}; +using const_base64_wrapper = base64_wrapper_; +using base64_wrapper = base64_wrapper_; + + +/** mark a variable to be written in base64 format */ +template +C4_ALWAYS_INLINE const_base64_wrapper cbase64(Args const& C4_RESTRICT ...args) +{ + return const_base64_wrapper(cblob(args...)); +} +/** mark a csubstr to be written in base64 format */ +C4_ALWAYS_INLINE const_base64_wrapper cbase64(csubstr s) +{ + return const_base64_wrapper(cblob(s.str, s.len)); +} +/** mark a variable to be written in base64 format */ +template +C4_ALWAYS_INLINE const_base64_wrapper base64(Args const& C4_RESTRICT ...args) +{ + return const_base64_wrapper(cblob(args...)); +} +/** mark a csubstr to be written in base64 format */ +C4_ALWAYS_INLINE const_base64_wrapper base64(csubstr s) +{ + return const_base64_wrapper(cblob(s.str, s.len)); +} + +/** mark a variable to be read in base64 format */ +template +C4_ALWAYS_INLINE base64_wrapper base64(Args &... args) +{ + return base64_wrapper(blob(args...)); +} +/** mark a variable to be read in base64 format */ +C4_ALWAYS_INLINE base64_wrapper base64(substr s) +{ + return base64_wrapper(blob(s.str, s.len)); +} + +} // namespace fmt + + +/** write a variable in base64 format */ +inline size_t to_chars(substr buf, fmt::const_base64_wrapper b) +{ + return base64_encode(buf, b.data); +} + +/** read a variable in base64 format */ +inline size_t from_chars(csubstr buf, fmt::base64_wrapper *b) +{ + return base64_decode(buf, b->data); +} + +} // namespace c4 + +#endif /* _C4_BASE64_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/base64.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/std/string.hpp +// https://github.com/biojppm/c4core/src/c4/std/string.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_STD_STRING_HPP_ +#define _C4_STD_STRING_HPP_ + +/** @file string.hpp */ + +#ifndef C4CORE_SINGLE_HEADER +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/substr.hpp +//#include "c4/substr.hpp" +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + +#endif + +//included above: +//#include + +namespace c4 { + +//----------------------------------------------------------------------------- + +/** get a writeable view to an existing std::string. + * When the string is empty, the returned view will be pointing + * at the character with value '\0', but the size will be zero. + * @see https://en.cppreference.com/w/cpp/string/basic_string/operator_at + */ +C4_ALWAYS_INLINE c4::substr to_substr(std::string &s) noexcept +{ + #if C4_CPP < 11 + #error this function will do undefined behavior + #endif + // since c++11 it is legal to call s[s.size()]. + return c4::substr(&s[0], s.size()); +} + +/** get a readonly view to an existing std::string. + * When the string is empty, the returned view will be pointing + * at the character with value '\0', but the size will be zero. + * @see https://en.cppreference.com/w/cpp/string/basic_string/operator_at + */ +C4_ALWAYS_INLINE c4::csubstr to_csubstr(std::string const& s) noexcept +{ + #if C4_CPP < 11 + #error this function will do undefined behavior + #endif + // since c++11 it is legal to call s[s.size()]. + return c4::csubstr(&s[0], s.size()); +} + +//----------------------------------------------------------------------------- + +C4_ALWAYS_INLINE bool operator== (c4::csubstr ss, std::string const& s) { return ss.compare(to_csubstr(s)) == 0; } +C4_ALWAYS_INLINE bool operator!= (c4::csubstr ss, std::string const& s) { return ss.compare(to_csubstr(s)) != 0; } +C4_ALWAYS_INLINE bool operator>= (c4::csubstr ss, std::string const& s) { return ss.compare(to_csubstr(s)) >= 0; } +C4_ALWAYS_INLINE bool operator> (c4::csubstr ss, std::string const& s) { return ss.compare(to_csubstr(s)) > 0; } +C4_ALWAYS_INLINE bool operator<= (c4::csubstr ss, std::string const& s) { return ss.compare(to_csubstr(s)) <= 0; } +C4_ALWAYS_INLINE bool operator< (c4::csubstr ss, std::string const& s) { return ss.compare(to_csubstr(s)) < 0; } + +C4_ALWAYS_INLINE bool operator== (std::string const& s, c4::csubstr ss) { return ss.compare(to_csubstr(s)) == 0; } +C4_ALWAYS_INLINE bool operator!= (std::string const& s, c4::csubstr ss) { return ss.compare(to_csubstr(s)) != 0; } +C4_ALWAYS_INLINE bool operator>= (std::string const& s, c4::csubstr ss) { return ss.compare(to_csubstr(s)) <= 0; } +C4_ALWAYS_INLINE bool operator> (std::string const& s, c4::csubstr ss) { return ss.compare(to_csubstr(s)) < 0; } +C4_ALWAYS_INLINE bool operator<= (std::string const& s, c4::csubstr ss) { return ss.compare(to_csubstr(s)) >= 0; } +C4_ALWAYS_INLINE bool operator< (std::string const& s, c4::csubstr ss) { return ss.compare(to_csubstr(s)) > 0; } + +//----------------------------------------------------------------------------- + +/** copy an std::string to a writeable string view */ +inline size_t to_chars(c4::substr buf, std::string const& s) +{ + C4_ASSERT(!buf.overlaps(to_csubstr(s))); + size_t len = buf.len < s.size() ? buf.len : s.size(); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len) + { + C4_ASSERT(s.data() != nullptr); + C4_ASSERT(buf.str != nullptr); + memcpy(buf.str, s.data(), len); + } + return s.size(); // return the number of needed chars +} + +/** copy a string view to an existing std::string */ +inline bool from_chars(c4::csubstr buf, std::string * s) +{ + s->resize(buf.len); + C4_ASSERT(!buf.overlaps(to_csubstr(*s))); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(buf.len) + { + C4_ASSERT(buf.str != nullptr); + memcpy(&(*s)[0], buf.str, buf.len); + } + return true; +} + +} // namespace c4 + +#endif // _C4_STD_STRING_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/std/string.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/std/vector.hpp +// https://github.com/biojppm/c4core/src/c4/std/vector.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_STD_VECTOR_HPP_ +#define _C4_STD_VECTOR_HPP_ + +/** @file vector.hpp provides conversion and comparison facilities + * from/between std::vector to c4::substr and c4::csubstr. + * @todo add to_span() and friends + */ + +#ifndef C4CORE_SINGLE_HEADER +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/substr.hpp +//#include "c4/substr.hpp" +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + +#endif + +#include + +namespace c4 { + +//----------------------------------------------------------------------------- + +/** get a substr (writeable string view) of an existing std::vector */ +template +c4::substr to_substr(std::vector &vec) +{ + char *data = vec.empty() ? nullptr : vec.data(); // data() may or may not return a null pointer. + return c4::substr(data, vec.size()); +} + +/** get a csubstr (read-only string) view of an existing std::vector */ +template +c4::csubstr to_csubstr(std::vector const& vec) +{ + const char *data = vec.empty() ? nullptr : vec.data(); // data() may or may not return a null pointer. + return c4::csubstr(data, vec.size()); +} + +//----------------------------------------------------------------------------- +// comparisons between substrings and std::vector + +template C4_ALWAYS_INLINE bool operator!= (c4::csubstr ss, std::vector const& s) { return ss != to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator== (c4::csubstr ss, std::vector const& s) { return ss == to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator>= (c4::csubstr ss, std::vector const& s) { return ss >= to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator> (c4::csubstr ss, std::vector const& s) { return ss > to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator<= (c4::csubstr ss, std::vector const& s) { return ss <= to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator< (c4::csubstr ss, std::vector const& s) { return ss < to_csubstr(s); } + +template C4_ALWAYS_INLINE bool operator!= (std::vector const& s, c4::csubstr ss) { return ss != to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator== (std::vector const& s, c4::csubstr ss) { return ss == to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator>= (std::vector const& s, c4::csubstr ss) { return ss <= to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator> (std::vector const& s, c4::csubstr ss) { return ss < to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator<= (std::vector const& s, c4::csubstr ss) { return ss >= to_csubstr(s); } +template C4_ALWAYS_INLINE bool operator< (std::vector const& s, c4::csubstr ss) { return ss > to_csubstr(s); } + +//----------------------------------------------------------------------------- + +/** copy a std::vector to a writeable string view */ +template +inline size_t to_chars(c4::substr buf, std::vector const& s) +{ + C4_ASSERT(!buf.overlaps(to_csubstr(s))); + size_t len = buf.len < s.size() ? buf.len : s.size(); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(len > 0) + { + memcpy(buf.str, s.data(), len); + } + return s.size(); // return the number of needed chars +} + +/** copy a string view to an existing std::vector */ +template +inline bool from_chars(c4::csubstr buf, std::vector * s) +{ + s->resize(buf.len); + C4_ASSERT(!buf.overlaps(to_csubstr(*s))); + // calling memcpy with null strings is undefined behavior + // and will wreak havoc in calling code's branches. + // see https://github.com/biojppm/rapidyaml/pull/264#issuecomment-1262133637 + if(buf.len > 0) + { + memcpy(&(*s)[0], buf.str, buf.len); + } + return true; +} + +} // namespace c4 + +#endif // _C4_STD_VECTOR_HPP_ + + +// (end https://github.com/biojppm/c4core/src/c4/std/vector.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/std/tuple.hpp +// https://github.com/biojppm/c4core/src/c4/std/tuple.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_STD_TUPLE_HPP_ +#define _C4_STD_TUPLE_HPP_ + +/** @file tuple.hpp */ + +#ifndef C4CORE_SINGLE_HEADER +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/format.hpp +//#include "c4/format.hpp" +#if !defined(C4_FORMAT_HPP_) && !defined(_C4_FORMAT_HPP_) +#error "amalgamate: file c4/format.hpp must have been included at this point" +#endif /* C4_FORMAT_HPP_ */ + +#endif + +#include + +/** this is a work in progress */ +#undef C4_TUPLE_TO_CHARS + +namespace c4 { + +#ifdef C4_TUPLE_TO_CHARS +namespace detail { + +template< size_t Curr, class... Types > +struct tuple_helper +{ + static size_t do_cat(substr buf, std::tuple< Types... > const& tp) + { + size_t num = to_chars(buf, std::get(tp)); + buf = buf.len >= num ? buf.sub(num) : substr{}; + num += tuple_helper< Curr+1, Types... >::do_cat(buf, tp); + return num; + } + + static size_t do_uncat(csubstr buf, std::tuple< Types... > & tp) + { + size_t num = from_str_trim(buf, &std::get(tp)); + if(num == csubstr::npos) return csubstr::npos; + buf = buf.len >= num ? buf.sub(num) : substr{}; + num += tuple_helper< Curr+1, Types... >::do_uncat(buf, tp); + return num; + } + + template< class Sep > + static size_t do_catsep_more(substr buf, Sep const& sep, std::tuple< Types... > const& tp) + { + size_t ret = to_chars(buf, sep), num = ret; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = to_chars(buf, std::get(tp)); + num += ret; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = tuple_helper< Curr+1, Types... >::do_catsep_more(buf, sep, tp); + num += ret; + return num; + } + + template< class Sep > + static size_t do_uncatsep_more(csubstr buf, Sep & sep, std::tuple< Types... > & tp) + { + size_t ret = from_str_trim(buf, &sep), num = ret; + if(ret == csubstr::npos) return csubstr::npos; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = from_str_trim(buf, &std::get(tp)); + if(ret == csubstr::npos) return csubstr::npos; + num += ret; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = tuple_helper< Curr+1, Types... >::do_uncatsep_more(buf, sep, tp); + if(ret == csubstr::npos) return csubstr::npos; + num += ret; + return num; + } + + static size_t do_format(substr buf, csubstr fmt, std::tuple< Types... > const& tp) + { + auto pos = fmt.find("{}"); + if(pos != csubstr::npos) + { + size_t num = to_chars(buf, fmt.sub(0, pos)); + size_t out = num; + buf = buf.len >= num ? buf.sub(num) : substr{}; + num = to_chars(buf, std::get(tp)); + out += num; + buf = buf.len >= num ? buf.sub(num) : substr{}; + num = tuple_helper< Curr+1, Types... >::do_format(buf, fmt.sub(pos + 2), tp); + out += num; + return out; + } + else + { + return format(buf, fmt); + } + } + + static size_t do_unformat(csubstr buf, csubstr fmt, std::tuple< Types... > & tp) + { + auto pos = fmt.find("{}"); + if(pos != csubstr::npos) + { + size_t num = pos; + size_t out = num; + buf = buf.len >= num ? buf.sub(num) : substr{}; + num = from_str_trim(buf, &std::get(tp)); + out += num; + buf = buf.len >= num ? buf.sub(num) : substr{}; + num = tuple_helper< Curr+1, Types... >::do_unformat(buf, fmt.sub(pos + 2), tp); + out += num; + return out; + } + else + { + return tuple_helper< sizeof...(Types), Types... >::do_unformat(buf, fmt, tp); + } + } + +}; + +/** @todo VS compilation fails for this class */ +template< class... Types > +struct tuple_helper< sizeof...(Types), Types... > +{ + static size_t do_cat(substr /*buf*/, std::tuple const& /*tp*/) { return 0; } + static size_t do_uncat(csubstr /*buf*/, std::tuple & /*tp*/) { return 0; } + + template< class Sep > static size_t do_catsep_more(substr /*buf*/, Sep const& /*sep*/, std::tuple const& /*tp*/) { return 0; } + template< class Sep > static size_t do_uncatsep_more(csubstr /*buf*/, Sep & /*sep*/, std::tuple & /*tp*/) { return 0; } + + static size_t do_format(substr buf, csubstr fmt, std::tuple const& /*tp*/) + { + return to_chars(buf, fmt); + } + + static size_t do_unformat(csubstr buf, csubstr fmt, std::tuple const& /*tp*/) + { + return 0; + } +}; + +} // namespace detail + +template< class... Types > +inline size_t cat(substr buf, std::tuple< Types... > const& tp) +{ + return detail::tuple_helper< 0, Types... >::do_cat(buf, tp); +} + +template< class... Types > +inline size_t uncat(csubstr buf, std::tuple< Types... > & tp) +{ + return detail::tuple_helper< 0, Types... >::do_uncat(buf, tp); +} + +template< class Sep, class... Types > +inline size_t catsep(substr buf, Sep const& sep, std::tuple< Types... > const& tp) +{ + size_t num = to_chars(buf, std::cref(std::get<0>(tp))); + buf = buf.len >= num ? buf.sub(num) : substr{}; + num += detail::tuple_helper< 1, Types... >::do_catsep_more(buf, sep, tp); + return num; +} + +template< class Sep, class... Types > +inline size_t uncatsep(csubstr buf, Sep & sep, std::tuple< Types... > & tp) +{ + size_t ret = from_str_trim(buf, &std::get<0>(tp)), num = ret; + if(ret == csubstr::npos) return csubstr::npos; + buf = buf.len >= ret ? buf.sub(ret) : substr{}; + ret = detail::tuple_helper< 1, Types... >::do_uncatsep_more(buf, sep, tp); + if(ret == csubstr::npos) return csubstr::npos; + num += ret; + return num; +} + +template< class... Types > +inline size_t format(substr buf, csubstr fmt, std::tuple< Types... > const& tp) +{ + return detail::tuple_helper< 0, Types... >::do_format(buf, fmt, tp); +} + +template< class... Types > +inline size_t unformat(csubstr buf, csubstr fmt, std::tuple< Types... > & tp) +{ + return detail::tuple_helper< 0, Types... >::do_unformat(buf, fmt, tp); +} +#endif // C4_TUPLE_TO_CHARS + +} // namespace c4 + +#endif /* _C4_STD_TUPLE_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/std/tuple.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/ext/rng/rng.hpp +// https://github.com/biojppm/c4core/src/c4/ext/rng/rng.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +/* Copyright (c) 2018 Arvid Gerstmann. + * + * https://arvid.io/2018/07/02/better-cxx-prng/ + * + * This code is licensed under MIT license. */ +#ifndef AG_RANDOM_H +#define AG_RANDOM_H + +//included above: +//#include +#include + + +namespace c4 { +namespace rng { + + +class splitmix +{ +public: + using result_type = uint32_t; + static constexpr result_type (min)() { return 0; } + static constexpr result_type (max)() { return UINT32_MAX; } + friend bool operator==(splitmix const &, splitmix const &); + friend bool operator!=(splitmix const &, splitmix const &); + + splitmix() : m_seed(1) {} + explicit splitmix(uint64_t s) : m_seed(s) {} + explicit splitmix(std::random_device &rd) + { + seed(rd); + } + + void seed(uint64_t s) { m_seed = s; } + void seed(std::random_device &rd) + { + m_seed = uint64_t(rd()) << 31 | uint64_t(rd()); + } + + result_type operator()() + { + uint64_t z = (m_seed += UINT64_C(0x9E3779B97F4A7C15)); + z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9); + z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB); + return result_type((z ^ (z >> 31)) >> 31); + } + + void discard(unsigned long long n) + { + for (unsigned long long i = 0; i < n; ++i) + operator()(); + } + +private: + uint64_t m_seed; +}; + +inline bool operator==(splitmix const &lhs, splitmix const &rhs) +{ + return lhs.m_seed == rhs.m_seed; +} +inline bool operator!=(splitmix const &lhs, splitmix const &rhs) +{ + return lhs.m_seed != rhs.m_seed; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +class xorshift +{ +public: + using result_type = uint32_t; + static constexpr result_type (min)() { return 0; } + static constexpr result_type (max)() { return UINT32_MAX; } + friend bool operator==(xorshift const &, xorshift const &); + friend bool operator!=(xorshift const &, xorshift const &); + + xorshift() : m_seed(0xc1f651c67c62c6e0ull) {} + explicit xorshift(std::random_device &rd) + { + seed(rd); + } + + void seed(uint64_t s) { m_seed = s; } + void seed(std::random_device &rd) + { + m_seed = uint64_t(rd()) << 31 | uint64_t(rd()); + } + + result_type operator()() + { + uint64_t result = m_seed * 0xd989bcacc137dcd5ull; + m_seed ^= m_seed >> 11; + m_seed ^= m_seed << 31; + m_seed ^= m_seed >> 18; + return uint32_t(result >> 32ull); + } + + void discard(unsigned long long n) + { + for (unsigned long long i = 0; i < n; ++i) + operator()(); + } + +private: + uint64_t m_seed; +}; + +inline bool operator==(xorshift const &lhs, xorshift const &rhs) +{ + return lhs.m_seed == rhs.m_seed; +} +inline bool operator!=(xorshift const &lhs, xorshift const &rhs) +{ + return lhs.m_seed != rhs.m_seed; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +class pcg +{ +public: + using result_type = uint32_t; + static constexpr result_type (min)() { return 0; } + static constexpr result_type (max)() { return UINT32_MAX; } + friend bool operator==(pcg const &, pcg const &); + friend bool operator!=(pcg const &, pcg const &); + + pcg() + : m_state(0x853c49e6748fea9bULL) + , m_inc(0xda3e39cb94b95bdbULL) + {} + explicit pcg(uint64_t s) { m_state = s; m_inc = m_state << 1; } + explicit pcg(std::random_device &rd) + { + seed(rd); + } + + void seed(uint64_t s) { m_state = s; } + void seed(std::random_device &rd) + { + uint64_t s0 = uint64_t(rd()) << 31 | uint64_t(rd()); + uint64_t s1 = uint64_t(rd()) << 31 | uint64_t(rd()); + + m_state = 0; + m_inc = (s1 << 1) | 1; + (void)operator()(); + m_state += s0; + (void)operator()(); + } + + result_type operator()() + { + uint64_t oldstate = m_state; + m_state = oldstate * 6364136223846793005ULL + m_inc; + uint32_t xorshifted = uint32_t(((oldstate >> 18u) ^ oldstate) >> 27u); + //int rot = oldstate >> 59u; // the original. error? + int64_t rot = (int64_t)oldstate >> 59u; // error? + return (xorshifted >> rot) | (xorshifted << ((uint64_t)(-rot) & 31)); + } + + void discard(unsigned long long n) + { + for (unsigned long long i = 0; i < n; ++i) + operator()(); + } + +private: + uint64_t m_state; + uint64_t m_inc; +}; + +inline bool operator==(pcg const &lhs, pcg const &rhs) +{ + return lhs.m_state == rhs.m_state + && lhs.m_inc == rhs.m_inc; +} +inline bool operator!=(pcg const &lhs, pcg const &rhs) +{ + return lhs.m_state != rhs.m_state + || lhs.m_inc != rhs.m_inc; +} + +} // namespace rng +} // namespace c4 + +#endif /* AG_RANDOM_H */ + + +// (end https://github.com/biojppm/c4core/src/c4/ext/rng/rng.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/ext/sg14/inplace_function.h +// https://github.com/biojppm/c4core/src/c4/ext/sg14/inplace_function.h +//-------------------------------------------------------------------------------- +//******************************************************************************** + +/* + * Boost Software License - Version 1.0 - August 17th, 2003 + * + * Permission is hereby granted, free of charge, to any person or organization + * obtaining a copy of the software and accompanying documentation covered by + * this license (the "Software") to use, reproduce, display, distribute, + * execute, and transmit the Software, and to prepare derivative works of the + * Software, and to permit third-parties to whom the Software is furnished to + * do so, all subject to the following: + * + * The copyright notices in the Software and this entire statement, including + * the above license grant, this restriction and the following disclaimer, + * must be included in all copies of the Software, in whole or in part, and + * all derivative works of the Software, unless such copies or derivative + * works are solely in the form of machine-executable object code generated by + * a source language processor. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT + * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE + * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef _C4_EXT_SG14_INPLACE_FUNCTION_H_ +#define _C4_EXT_SG14_INPLACE_FUNCTION_H_ + +//included above: +//#include +//included above: +//#include +#include + +namespace stdext { + +namespace inplace_function_detail { + +static constexpr size_t InplaceFunctionDefaultCapacity = 32; + +#if defined(__GLIBCXX__) // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61458 +template +union aligned_storage_helper { + struct double1 { double a; }; + struct double4 { double a[4]; }; + template using maybe = typename std::conditional<(Cap >= sizeof(T)), T, char>::type; + char real_data[Cap]; + maybe a; + maybe b; + maybe c; + maybe d; + maybe e; + maybe f; + maybe g; + maybe h; +}; + +template>::value> +struct aligned_storage { + using type = typename std::aligned_storage::type; +}; +#else +using std::aligned_storage; +#endif + +template struct wrapper +{ + using type = T; +}; + +template struct vtable +{ + using storage_ptr_t = void*; + + using invoke_ptr_t = R(*)(storage_ptr_t, Args&&...); + using process_ptr_t = void(*)(storage_ptr_t, storage_ptr_t); + using destructor_ptr_t = void(*)(storage_ptr_t); + + const invoke_ptr_t invoke_ptr; + const process_ptr_t copy_ptr; + const process_ptr_t move_ptr; + const destructor_ptr_t destructor_ptr; + + explicit constexpr vtable() noexcept : + invoke_ptr{ [](storage_ptr_t, Args&&...) -> R + { throw std::bad_function_call(); } + }, + copy_ptr{ [](storage_ptr_t, storage_ptr_t) noexcept -> void {} }, + move_ptr{ [](storage_ptr_t, storage_ptr_t) noexcept -> void {} }, + destructor_ptr{ [](storage_ptr_t) noexcept -> void {} } + {} + + template explicit constexpr vtable(wrapper) noexcept : + invoke_ptr{ [](storage_ptr_t storage_ptr, Args&&... args) + noexcept(noexcept(std::declval()(args...))) -> R + { return (*static_cast(storage_ptr))( + std::forward(args)... + ); } + }, + copy_ptr{ [](storage_ptr_t dst_ptr, storage_ptr_t src_ptr) + noexcept(std::is_nothrow_copy_constructible::value) -> void + { new (dst_ptr) C{ (*static_cast(src_ptr)) }; } + }, + move_ptr{ [](storage_ptr_t dst_ptr, storage_ptr_t src_ptr) + noexcept(std::is_nothrow_move_constructible::value) -> void + { new (dst_ptr) C{ std::move(*static_cast(src_ptr)) }; } + }, + destructor_ptr{ [](storage_ptr_t storage_ptr) + noexcept -> void + { static_cast(storage_ptr)->~C(); } + } + {} + + vtable(const vtable&) = delete; + vtable(vtable&&) = delete; + + vtable& operator= (const vtable&) = delete; + vtable& operator= (vtable&&) = delete; + + ~vtable() = default; +}; + +template +struct is_valid_inplace_dst : std::true_type +{ + static_assert(DstCap >= SrcCap, + "Can't squeeze larger inplace_function into a smaller one" + ); + + static_assert(DstAlign % SrcAlign == 0, + "Incompatible inplace_function alignments" + ); +}; + +} // namespace inplace_function_detail + +template< + typename Signature, + size_t Capacity = inplace_function_detail::InplaceFunctionDefaultCapacity, + size_t Alignment = std::alignment_of::type>::value +> +class inplace_function; // unspecified + +template< + typename R, + typename... Args, + size_t Capacity, + size_t Alignment +> +class inplace_function +{ + static const constexpr inplace_function_detail::vtable empty_vtable{}; +public: + using capacity = std::integral_constant; + using alignment = std::integral_constant; + + using storage_t = typename inplace_function_detail::aligned_storage::type; + using vtable_t = inplace_function_detail::vtable; + using vtable_ptr_t = const vtable_t*; + + template friend class inplace_function; + + inplace_function() noexcept : + vtable_ptr_{std::addressof(empty_vtable)} + {} + + template< + typename T, + typename C = typename std::decay::type, + typename = typename std::enable_if< + !(std::is_same::value + || std::is_convertible::value) + >::type + > + inplace_function(T&& closure) + { +#if __cplusplus >= 201703L + static_assert(std::is_invocable_r::value, + "inplace_function cannot be constructed from non-callable type" + ); +#endif + static_assert(std::is_copy_constructible::value, + "inplace_function cannot be constructed from non-copyable type" + ); + + static_assert(sizeof(C) <= Capacity, + "inplace_function cannot be constructed from object with this (large) size" + ); + + static_assert(Alignment % std::alignment_of::value == 0, + "inplace_function cannot be constructed from object with this (large) alignment" + ); + + static const vtable_t vt{inplace_function_detail::wrapper{}}; + vtable_ptr_ = std::addressof(vt); + + new (std::addressof(storage_)) C{std::forward(closure)}; + } + + inplace_function(std::nullptr_t) noexcept : + vtable_ptr_{std::addressof(empty_vtable)} + {} + + inplace_function(const inplace_function& other) : + vtable_ptr_{other.vtable_ptr_} + { + vtable_ptr_->copy_ptr( + std::addressof(storage_), + std::addressof(other.storage_) + ); + } + + inplace_function(inplace_function&& other) : + vtable_ptr_{other.vtable_ptr_} + { + vtable_ptr_->move_ptr( + std::addressof(storage_), + std::addressof(other.storage_) + ); + } + + inplace_function& operator= (std::nullptr_t) noexcept + { + vtable_ptr_->destructor_ptr(std::addressof(storage_)); + vtable_ptr_ = std::addressof(empty_vtable); + return *this; + } + + inplace_function& operator= (const inplace_function& other) + { + if(this != std::addressof(other)) + { + vtable_ptr_->destructor_ptr(std::addressof(storage_)); + + vtable_ptr_ = other.vtable_ptr_; + vtable_ptr_->copy_ptr( + std::addressof(storage_), + std::addressof(other.storage_) + ); + } + return *this; + } + + inplace_function& operator= (inplace_function&& other) + { + if(this != std::addressof(other)) + { + vtable_ptr_->destructor_ptr(std::addressof(storage_)); + + vtable_ptr_ = other.vtable_ptr_; + vtable_ptr_->move_ptr( + std::addressof(storage_), + std::addressof(other.storage_) + ); + } + return *this; + } + + ~inplace_function() + { + vtable_ptr_->destructor_ptr(std::addressof(storage_)); + } + + R operator() (Args... args) const + { + return vtable_ptr_->invoke_ptr( + std::addressof(storage_), + std::forward(args)... + ); + } + + constexpr bool operator== (std::nullptr_t) const noexcept + { + return !operator bool(); + } + + constexpr bool operator!= (std::nullptr_t) const noexcept + { + return operator bool(); + } + + explicit constexpr operator bool() const noexcept + { + return vtable_ptr_ != std::addressof(empty_vtable); + } + + template + operator inplace_function() const& + { + static_assert(inplace_function_detail::is_valid_inplace_dst< + Cap, Align, Capacity, Alignment + >::value, "conversion not allowed"); + + return {vtable_ptr_, vtable_ptr_->copy_ptr, std::addressof(storage_)}; + } + + template + operator inplace_function() && + { + static_assert(inplace_function_detail::is_valid_inplace_dst< + Cap, Align, Capacity, Alignment + >::value, "conversion not allowed"); + + return {vtable_ptr_, vtable_ptr_->move_ptr, std::addressof(storage_)}; + } + + void swap(inplace_function& other) + { + if (this == std::addressof(other)) return; + + storage_t tmp; + vtable_ptr_->move_ptr( + std::addressof(tmp), + std::addressof(storage_) + ); + vtable_ptr_->destructor_ptr(std::addressof(storage_)); + + other.vtable_ptr_->move_ptr( + std::addressof(storage_), + std::addressof(other.storage_) + ); + other.vtable_ptr_->destructor_ptr(std::addressof(other.storage_)); + + vtable_ptr_->move_ptr( + std::addressof(other.storage_), + std::addressof(tmp) + ); + vtable_ptr_->destructor_ptr(std::addressof(tmp)); + + std::swap(vtable_ptr_, other.vtable_ptr_); + } + +private: + vtable_ptr_t vtable_ptr_; + mutable storage_t storage_; + + inplace_function( + vtable_ptr_t vtable_ptr, + typename vtable_t::process_ptr_t process_ptr, + typename vtable_t::storage_ptr_t storage_ptr + ) : vtable_ptr_{vtable_ptr} + { + process_ptr(std::addressof(storage_), storage_ptr); + } +}; + +} // namespace stdext + +#endif /* _C4_EXT_SG14_INPLACE_FUNCTION_H_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/ext/sg14/inplace_function.h) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/language.cpp +// https://github.com/biojppm/c4core/src/c4/language.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef C4CORE_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/language.hpp +//#include "c4/language.hpp" +#if !defined(C4_LANGUAGE_HPP_) && !defined(_C4_LANGUAGE_HPP_) +#error "amalgamate: file c4/language.hpp must have been included at this point" +#endif /* C4_LANGUAGE_HPP_ */ + + +namespace c4 { +namespace detail { + +#ifndef __GNUC__ +void use_char_pointer(char const volatile* v) +{ + C4_UNUSED(v); +} +#else +void foo() {} // to avoid empty file warning from the linker +#endif + +} // namespace detail +} // namespace c4 + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/language.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/format.cpp +// https://github.com/biojppm/c4core/src/c4/format.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef C4CORE_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/format.hpp +//#include "c4/format.hpp" +#if !defined(C4_FORMAT_HPP_) && !defined(_C4_FORMAT_HPP_) +#error "amalgamate: file c4/format.hpp must have been included at this point" +#endif /* C4_FORMAT_HPP_ */ + + +//included above: +//#include // for std::align + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wformat-nonliteral" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif + +namespace c4 { + + +size_t to_chars(substr buf, fmt::const_raw_wrapper r) +{ + void * vptr = buf.str; + size_t space = buf.len; + auto ptr = (decltype(buf.str)) std::align(r.alignment, r.len, vptr, space); + if(ptr == nullptr) + { + // if it was not possible to align, return a conservative estimate + // of the required space + return r.alignment + r.len; + } + C4_CHECK(ptr >= buf.begin() && ptr <= buf.end()); + size_t sz = static_cast(ptr - buf.str) + r.len; + if(sz <= buf.len) + { + memcpy(ptr, r.buf, r.len); + } + return sz; +} + + +bool from_chars(csubstr buf, fmt::raw_wrapper *r) +{ + void * vptr = (void*)buf.str; + size_t space = buf.len; + auto ptr = (decltype(buf.str)) std::align(r->alignment, r->len, vptr, space); + C4_CHECK(ptr != nullptr); + C4_CHECK(ptr >= buf.begin() && ptr <= buf.end()); + //size_t dim = (ptr - buf.str) + r->len; + memcpy(r->buf, ptr, r->len); + return true; +} + + +} // namespace c4 + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/format.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/memory_util.cpp +// https://github.com/biojppm/c4core/src/c4/memory_util.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef C4CORE_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/memory_util.hpp +//#include "c4/memory_util.hpp" +#if !defined(C4_MEMORY_UTIL_HPP_) && !defined(_C4_MEMORY_UTIL_HPP_) +#error "amalgamate: file c4/memory_util.hpp must have been included at this point" +#endif /* C4_MEMORY_UTIL_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + + +namespace c4 { + +/** Fills 'dest' with the first 'pattern_size' bytes at 'pattern', 'num_times'. */ +void mem_repeat(void* dest, void const* pattern, size_t pattern_size, size_t num_times) +{ + if(C4_UNLIKELY(num_times == 0)) + return; + C4_ASSERT( ! mem_overlaps(dest, pattern, num_times*pattern_size, pattern_size)); + char *begin = (char*)dest; + char *end = begin + num_times * pattern_size; + // copy the pattern once + ::memcpy(begin, pattern, pattern_size); + // now copy from dest to itself, doubling up every time + size_t n = pattern_size; + while(begin + 2*n < end) + { + ::memcpy(begin + n, begin, n); + n <<= 1; // double n + } + // copy the missing part + if(begin + n < end) + { + ::memcpy(begin + n, begin, static_cast(end - (begin + n))); + } +} + +} // namespace c4 + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/memory_util.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/char_traits.cpp +// https://github.com/biojppm/c4core/src/c4/char_traits.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef C4CORE_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/char_traits.hpp +//#include "c4/char_traits.hpp" +#if !defined(C4_CHAR_TRAITS_HPP_) && !defined(_C4_CHAR_TRAITS_HPP_) +#error "amalgamate: file c4/char_traits.hpp must have been included at this point" +#endif /* C4_CHAR_TRAITS_HPP_ */ + + +namespace c4 { + +constexpr const char char_traits< char >::whitespace_chars[]; +constexpr const size_t char_traits< char >::num_whitespace_chars; +constexpr const wchar_t char_traits< wchar_t >::whitespace_chars[]; +constexpr const size_t char_traits< wchar_t >::num_whitespace_chars; + +} // namespace c4 + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/char_traits.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/memory_resource.cpp +// https://github.com/biojppm/c4core/src/c4/memory_resource.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef C4CORE_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/memory_resource.hpp +//#include "c4/memory_resource.hpp" +#if !defined(C4_MEMORY_RESOURCE_HPP_) && !defined(_C4_MEMORY_RESOURCE_HPP_) +#error "amalgamate: file c4/memory_resource.hpp must have been included at this point" +#endif /* C4_MEMORY_RESOURCE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/memory_util.hpp +//#include "c4/memory_util.hpp" +#if !defined(C4_MEMORY_UTIL_HPP_) && !defined(_C4_MEMORY_UTIL_HPP_) +#error "amalgamate: file c4/memory_util.hpp must have been included at this point" +#endif /* C4_MEMORY_UTIL_HPP_ */ + + +//included above: +//#include +//included above: +//#include +#if defined(C4_POSIX) || defined(C4_IOS) || defined(C4_MACOS) || defined(C4_ARM) +# include +#endif +#if defined(C4_ARM) +# include +#endif + +//included above: +//#include + +namespace c4 { + +namespace detail { + + +#ifdef C4_NO_ALLOC_DEFAULTS +aalloc_pfn s_aalloc = nullptr; +free_pfn s_afree = nullptr; +arealloc_pfn s_arealloc = nullptr; +#else + + +void afree_impl(void *ptr) +{ +#if defined(C4_WIN) || defined(C4_XBOX) + ::_aligned_free(ptr); +#else + ::free(ptr); +#endif +} + + +void* aalloc_impl(size_t size, size_t alignment) +{ + void *mem; +#if defined(C4_WIN) || defined(C4_XBOX) + mem = ::_aligned_malloc(size, alignment); + C4_CHECK(mem != nullptr || size == 0); +#elif defined(C4_ARM) + // https://stackoverflow.com/questions/53614538/undefined-reference-to-posix-memalign-in-arm-gcc + // https://electronics.stackexchange.com/questions/467382/e2-studio-undefined-reference-to-posix-memalign/467753 + mem = memalign(alignment, size); + C4_CHECK(mem != nullptr || size == 0); +#elif defined(C4_POSIX) || defined(C4_IOS) || defined(C4_MACOS) + // NOTE: alignment needs to be sized in multiples of sizeof(void*) + size_t amult = alignment; + if(C4_UNLIKELY(alignment < sizeof(void*))) + { + amult = sizeof(void*); + } + int ret = ::posix_memalign(&mem, amult, size); + if(C4_UNLIKELY(ret)) + { + if(ret == EINVAL) + { + C4_ERROR("The alignment argument %zu was not a power of two, " + "or was not a multiple of sizeof(void*)", alignment); + } + else if(ret == ENOMEM) + { + C4_ERROR("There was insufficient memory to fulfill the " + "allocation request of %zu bytes (alignment=%lu)", size, size); + } + return nullptr; + } +#else + C4_NOT_IMPLEMENTED_MSG("need to implement an aligned allocation for this platform"); +#endif + C4_ASSERT_MSG((uintptr_t(mem) & (alignment-1)) == 0, "address %p is not aligned to %zu boundary", mem, alignment); + return mem; +} + + +void* arealloc_impl(void* ptr, size_t oldsz, size_t newsz, size_t alignment) +{ + /** @todo make this more efficient + * @see https://stackoverflow.com/questions/9078259/does-realloc-keep-the-memory-alignment-of-posix-memalign + * @see look for qReallocAligned() in http://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qmalloc.cpp + */ + void *tmp = aalloc(newsz, alignment); + size_t min = newsz < oldsz ? newsz : oldsz; + if(mem_overlaps(ptr, tmp, oldsz, newsz)) + { + ::memmove(tmp, ptr, min); + } + else + { + ::memcpy(tmp, ptr, min); + } + afree(ptr); + return tmp; +} + +aalloc_pfn s_aalloc = aalloc_impl; +afree_pfn s_afree = afree_impl; +arealloc_pfn s_arealloc = arealloc_impl; + +#endif // C4_NO_ALLOC_DEFAULTS + +} // namespace detail + + +aalloc_pfn get_aalloc() +{ + return detail::s_aalloc; +} +void set_aalloc(aalloc_pfn fn) +{ + detail::s_aalloc = fn; +} + +afree_pfn get_afree() +{ + return detail::s_afree; +} +void set_afree(afree_pfn fn) +{ + detail::s_afree = fn; +} + +arealloc_pfn get_arealloc() +{ + return detail::s_arealloc; +} +void set_arealloc(arealloc_pfn fn) +{ + detail::s_arealloc = fn; +} + + +void* aalloc(size_t sz, size_t alignment) +{ + C4_ASSERT_MSG(c4::get_aalloc() != nullptr, "did you forget to call set_aalloc()?"); + auto fn = c4::get_aalloc(); + void* ptr = fn(sz, alignment); + return ptr; +} + +void afree(void* ptr) +{ + C4_ASSERT_MSG(c4::get_afree() != nullptr, "did you forget to call set_afree()?"); + auto fn = c4::get_afree(); + fn(ptr); +} + +void* arealloc(void *ptr, size_t oldsz, size_t newsz, size_t alignment) +{ + C4_ASSERT_MSG(c4::get_arealloc() != nullptr, "did you forget to call set_arealloc()?"); + auto fn = c4::get_arealloc(); + void* nptr = fn(ptr, oldsz, newsz, alignment); + return nptr; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +void detail::_MemoryResourceSingleChunk::release() +{ + if(m_mem && m_owner) + { + impl_type::deallocate(m_mem, m_size); + } + m_mem = nullptr; + m_size = 0; + m_owner = false; + m_pos = 0; +} + +void detail::_MemoryResourceSingleChunk::acquire(size_t sz) +{ + clear(); + m_owner = true; + m_mem = (char*) impl_type::allocate(sz, alignof(max_align_t)); + m_size = sz; + m_pos = 0; +} + +void detail::_MemoryResourceSingleChunk::acquire(void *mem, size_t sz) +{ + clear(); + m_owner = false; + m_mem = (char*) mem; + m_size = sz; + m_pos = 0; +} + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +void* MemoryResourceLinear::do_allocate(size_t sz, size_t alignment, void *hint) +{ + C4_UNUSED(hint); + if(sz == 0) return nullptr; + // make sure there's enough room to allocate + if(m_pos + sz > m_size) + { + C4_ERROR("out of memory"); + return nullptr; + } + void *mem = m_mem + m_pos; + size_t space = m_size - m_pos; + if(std::align(alignment, sz, mem, space)) + { + C4_ASSERT(m_pos <= m_size); + C4_ASSERT(m_size - m_pos >= space); + m_pos += (m_size - m_pos) - space; + m_pos += sz; + C4_ASSERT(m_pos <= m_size); + } + else + { + C4_ERROR("could not align memory"); + mem = nullptr; + } + return mem; +} + +void MemoryResourceLinear::do_deallocate(void* ptr, size_t sz, size_t alignment) +{ + C4_UNUSED(ptr); + C4_UNUSED(sz); + C4_UNUSED(alignment); + // nothing to do!! +} + +void* MemoryResourceLinear::do_reallocate(void* ptr, size_t oldsz, size_t newsz, size_t alignment) +{ + if(newsz == oldsz) return ptr; + // is ptr the most recently allocated (MRA) block? + char *cptr = (char*)ptr; + bool same_pos = (m_mem + m_pos == cptr + oldsz); + // no need to get more memory when shrinking + if(newsz < oldsz) + { + // if this is the MRA, we can safely shrink the position + if(same_pos) + { + m_pos -= oldsz - newsz; + } + return ptr; + } + // we're growing the block, and it fits in size + else if(same_pos && cptr + newsz <= m_mem + m_size) + { + // if this is the MRA, we can safely shrink the position + m_pos += newsz - oldsz; + return ptr; + } + // we're growing the block or it doesn't fit - + // delegate any of these situations to do_deallocate() + return do_allocate(newsz, alignment, ptr); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @todo add a free list allocator. A good candidate because of its + * small size is TLSF. + * + * @see https://github.com/mattconte/tlsf + * + * Comparisons: + * + * @see https://www.researchgate.net/publication/262375150_A_Comparative_Study_on_Memory_Allocators_in_Multicore_and_Multithreaded_Applications_-_SBESC_2011_-_Presentation_Slides + * @see http://webkit.sed.hu/blog/20100324/war-allocators-tlsf-action + * @see https://github.com/emeryberger/Malloc-Implementations/tree/master/allocators + * + * */ + +} // namespace c4 + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +#ifdef C4_REDEFINE_CPPNEW +#include +void* operator new(size_t size) +{ + auto *mr = ::c4::get_memory_resource(); + return mr->allocate(size); +} +void operator delete(void *p) noexcept +{ + C4_NEVER_REACH(); +} +void operator delete(void *p, size_t size) +{ + auto *mr = ::c4::get_memory_resource(); + mr->deallocate(p, size); +} +void* operator new[](size_t size) +{ + return operator new(size); +} +void operator delete[](void *p) noexcept +{ + operator delete(p); +} +void operator delete[](void *p, size_t size) +{ + operator delete(p, size); +} +void* operator new(size_t size, std::nothrow_t) +{ + return operator new(size); +} +void operator delete(void *p, std::nothrow_t) +{ + operator delete(p); +} +void operator delete(void *p, size_t size, std::nothrow_t) +{ + operator delete(p, size); +} +void* operator new[](size_t size, std::nothrow_t) +{ + return operator new(size); +} +void operator delete[](void *p, std::nothrow_t) +{ + operator delete(p); +} +void operator delete[](void *p, size_t, std::nothrow_t) +{ + operator delete(p, size); +} +#endif // C4_REDEFINE_CPPNEW + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/memory_resource.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/utf.cpp +// https://github.com/biojppm/c4core/src/c4/utf.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef C4CORE_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/utf.hpp +//#include "c4/utf.hpp" +#if !defined(C4_UTF_HPP_) && !defined(_C4_UTF_HPP_) +#error "amalgamate: file c4/utf.hpp must have been included at this point" +#endif /* C4_UTF_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/charconv.hpp +//#include "c4/charconv.hpp" +#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_) +#error "amalgamate: file c4/charconv.hpp must have been included at this point" +#endif /* C4_CHARCONV_HPP_ */ + + +namespace c4 { + +size_t decode_code_point(uint8_t *C4_RESTRICT buf, size_t buflen, const uint32_t code) +{ + C4_UNUSED(buflen); + C4_ASSERT(buflen >= 4); + if (code <= UINT32_C(0x7f)) + { + buf[0] = (uint8_t)code; + return 1u; + } + else if(code <= UINT32_C(0x7ff)) + { + buf[0] = (uint8_t)(UINT32_C(0xc0) | (code >> 6)); /* 110xxxxx */ + buf[1] = (uint8_t)(UINT32_C(0x80) | (code & UINT32_C(0x3f))); /* 10xxxxxx */ + return 2u; + } + else if(code <= UINT32_C(0xffff)) + { + buf[0] = (uint8_t)(UINT32_C(0xe0) | ((code >> 12))); /* 1110xxxx */ + buf[1] = (uint8_t)(UINT32_C(0x80) | ((code >> 6) & UINT32_C(0x3f))); /* 10xxxxxx */ + buf[2] = (uint8_t)(UINT32_C(0x80) | ((code ) & UINT32_C(0x3f))); /* 10xxxxxx */ + return 3u; + } + else if(code <= UINT32_C(0x10ffff)) + { + buf[0] = (uint8_t)(UINT32_C(0xf0) | ((code >> 18))); /* 11110xxx */ + buf[1] = (uint8_t)(UINT32_C(0x80) | ((code >> 12) & UINT32_C(0x3f))); /* 10xxxxxx */ + buf[2] = (uint8_t)(UINT32_C(0x80) | ((code >> 6) & UINT32_C(0x3f))); /* 10xxxxxx */ + buf[3] = (uint8_t)(UINT32_C(0x80) | ((code ) & UINT32_C(0x3f))); /* 10xxxxxx */ + return 4u; + } + return 0; +} + +substr decode_code_point(substr out, csubstr code_point) +{ + C4_ASSERT(out.len >= 4); + C4_ASSERT(!code_point.begins_with("U+")); + C4_ASSERT(!code_point.begins_with("\\x")); + C4_ASSERT(!code_point.begins_with("\\u")); + C4_ASSERT(!code_point.begins_with("\\U")); + C4_ASSERT(!code_point.begins_with('0')); + C4_ASSERT(code_point.len <= 8); + C4_ASSERT(code_point.len > 0); + uint32_t code_point_val; + C4_CHECK(read_hex(code_point, &code_point_val)); + size_t ret = decode_code_point((uint8_t*)out.str, out.len, code_point_val); + C4_ASSERT(ret <= 4); + return out.first(ret); +} + +} // namespace c4 + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/utf.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/base64.cpp +// https://github.com/biojppm/c4core/src/c4/base64.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef C4CORE_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/base64.hpp +//#include "c4/base64.hpp" +#if !defined(C4_BASE64_HPP_) && !defined(_C4_BASE64_HPP_) +#error "amalgamate: file c4/base64.hpp must have been included at this point" +#endif /* C4_BASE64_HPP_ */ + + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wchar-subscripts" // array subscript is of type 'char' +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wchar-subscripts" +# pragma GCC diagnostic ignored "-Wtype-limits" +#endif + +namespace c4 { + +namespace detail { + +constexpr static const char base64_sextet_to_char_[64] = { + /* 0/ 65*/ 'A', /* 1/ 66*/ 'B', /* 2/ 67*/ 'C', /* 3/ 68*/ 'D', + /* 4/ 69*/ 'E', /* 5/ 70*/ 'F', /* 6/ 71*/ 'G', /* 7/ 72*/ 'H', + /* 8/ 73*/ 'I', /* 9/ 74*/ 'J', /*10/ 75*/ 'K', /*11/ 74*/ 'L', + /*12/ 77*/ 'M', /*13/ 78*/ 'N', /*14/ 79*/ 'O', /*15/ 78*/ 'P', + /*16/ 81*/ 'Q', /*17/ 82*/ 'R', /*18/ 83*/ 'S', /*19/ 82*/ 'T', + /*20/ 85*/ 'U', /*21/ 86*/ 'V', /*22/ 87*/ 'W', /*23/ 88*/ 'X', + /*24/ 89*/ 'Y', /*25/ 90*/ 'Z', /*26/ 97*/ 'a', /*27/ 98*/ 'b', + /*28/ 99*/ 'c', /*29/100*/ 'd', /*30/101*/ 'e', /*31/102*/ 'f', + /*32/103*/ 'g', /*33/104*/ 'h', /*34/105*/ 'i', /*35/106*/ 'j', + /*36/107*/ 'k', /*37/108*/ 'l', /*38/109*/ 'm', /*39/110*/ 'n', + /*40/111*/ 'o', /*41/112*/ 'p', /*42/113*/ 'q', /*43/114*/ 'r', + /*44/115*/ 's', /*45/116*/ 't', /*46/117*/ 'u', /*47/118*/ 'v', + /*48/119*/ 'w', /*49/120*/ 'x', /*50/121*/ 'y', /*51/122*/ 'z', + /*52/ 48*/ '0', /*53/ 49*/ '1', /*54/ 50*/ '2', /*55/ 51*/ '3', + /*56/ 52*/ '4', /*57/ 53*/ '5', /*58/ 54*/ '6', /*59/ 55*/ '7', + /*60/ 56*/ '8', /*61/ 57*/ '9', /*62/ 43*/ '+', /*63/ 47*/ '/', +}; + +// https://www.cs.cmu.edu/~pattis/15-1XX/common/handouts/ascii.html +constexpr static const char base64_char_to_sextet_[128] = { + #define __ char(-1) // undefined below + /* 0 NUL*/ __, /* 1 SOH*/ __, /* 2 STX*/ __, /* 3 ETX*/ __, + /* 4 EOT*/ __, /* 5 ENQ*/ __, /* 6 ACK*/ __, /* 7 BEL*/ __, + /* 8 BS */ __, /* 9 TAB*/ __, /* 10 LF */ __, /* 11 VT */ __, + /* 12 FF */ __, /* 13 CR */ __, /* 14 SO */ __, /* 15 SI */ __, + /* 16 DLE*/ __, /* 17 DC1*/ __, /* 18 DC2*/ __, /* 19 DC3*/ __, + /* 20 DC4*/ __, /* 21 NAK*/ __, /* 22 SYN*/ __, /* 23 ETB*/ __, + /* 24 CAN*/ __, /* 25 EM */ __, /* 26 SUB*/ __, /* 27 ESC*/ __, + /* 28 FS */ __, /* 29 GS */ __, /* 30 RS */ __, /* 31 US */ __, + /* 32 SPC*/ __, /* 33 ! */ __, /* 34 " */ __, /* 35 # */ __, + /* 36 $ */ __, /* 37 % */ __, /* 38 & */ __, /* 39 ' */ __, + /* 40 ( */ __, /* 41 ) */ __, /* 42 * */ __, /* 43 + */ 62, + /* 44 , */ __, /* 45 - */ __, /* 46 . */ __, /* 47 / */ 63, + /* 48 0 */ 52, /* 49 1 */ 53, /* 50 2 */ 54, /* 51 3 */ 55, + /* 52 4 */ 56, /* 53 5 */ 57, /* 54 6 */ 58, /* 55 7 */ 59, + /* 56 8 */ 60, /* 57 9 */ 61, /* 58 : */ __, /* 59 ; */ __, + /* 60 < */ __, /* 61 = */ __, /* 62 > */ __, /* 63 ? */ __, + /* 64 @ */ __, /* 65 A */ 0, /* 66 B */ 1, /* 67 C */ 2, + /* 68 D */ 3, /* 69 E */ 4, /* 70 F */ 5, /* 71 G */ 6, + /* 72 H */ 7, /* 73 I */ 8, /* 74 J */ 9, /* 75 K */ 10, + /* 76 L */ 11, /* 77 M */ 12, /* 78 N */ 13, /* 79 O */ 14, + /* 80 P */ 15, /* 81 Q */ 16, /* 82 R */ 17, /* 83 S */ 18, + /* 84 T */ 19, /* 85 U */ 20, /* 86 V */ 21, /* 87 W */ 22, + /* 88 X */ 23, /* 89 Y */ 24, /* 90 Z */ 25, /* 91 [ */ __, + /* 92 \ */ __, /* 93 ] */ __, /* 94 ^ */ __, /* 95 _ */ __, + /* 96 ` */ __, /* 97 a */ 26, /* 98 b */ 27, /* 99 c */ 28, + /*100 d */ 29, /*101 e */ 30, /*102 f */ 31, /*103 g */ 32, + /*104 h */ 33, /*105 i */ 34, /*106 j */ 35, /*107 k */ 36, + /*108 l */ 37, /*109 m */ 38, /*110 n */ 39, /*111 o */ 40, + /*112 p */ 41, /*113 q */ 42, /*114 r */ 43, /*115 s */ 44, + /*116 t */ 45, /*117 u */ 46, /*118 v */ 47, /*119 w */ 48, + /*120 x */ 49, /*121 y */ 50, /*122 z */ 51, /*123 { */ __, + /*124 | */ __, /*125 } */ __, /*126 ~ */ __, /*127 DEL*/ __, + #undef __ +}; + +#ifndef NDEBUG +void base64_test_tables() +{ + for(size_t i = 0; i < C4_COUNTOF(detail::base64_sextet_to_char_); ++i) + { + char s2c = base64_sextet_to_char_[i]; + char c2s = base64_char_to_sextet_[(int)s2c]; + C4_CHECK((size_t)c2s == i); + } + for(size_t i = 0; i < C4_COUNTOF(detail::base64_char_to_sextet_); ++i) + { + char c2s = base64_char_to_sextet_[i]; + if(c2s == char(-1)) + continue; + char s2c = base64_sextet_to_char_[(int)c2s]; + C4_CHECK((size_t)s2c == i); + } +} +#endif +} // namespace detail + + +bool base64_valid(csubstr encoded) +{ + if(encoded.len % 4) return false; + for(const char c : encoded) + { + if(c < 0/* || c >= 128*/) + return false; + if(c == '=') + continue; + if(detail::base64_char_to_sextet_[c] == char(-1)) + return false; + } + return true; +} + + +size_t base64_encode(substr buf, cblob data) +{ + #define c4append_(c) { if(pos < buf.len) { buf.str[pos] = (c); } ++pos; } + #define c4append_idx_(char_idx) \ + {\ + C4_XASSERT((char_idx) < sizeof(detail::base64_sextet_to_char_));\ + c4append_(detail::base64_sextet_to_char_[(char_idx)]);\ + } + + size_t rem, pos = 0; + constexpr const uint32_t sextet_mask = uint32_t(1 << 6) - 1; + const unsigned char *C4_RESTRICT d = (unsigned char *) data.buf; // cast to unsigned to avoid wrapping high-bits + for(rem = data.len; rem >= 3; rem -= 3, d += 3) + { + const uint32_t val = ((uint32_t(d[0]) << 16) | (uint32_t(d[1]) << 8) | (uint32_t(d[2]))); + c4append_idx_((val >> 18) & sextet_mask); + c4append_idx_((val >> 12) & sextet_mask); + c4append_idx_((val >> 6) & sextet_mask); + c4append_idx_((val ) & sextet_mask); + } + C4_ASSERT(rem < 3); + if(rem == 2) + { + const uint32_t val = ((uint32_t(d[0]) << 16) | (uint32_t(d[1]) << 8)); + c4append_idx_((val >> 18) & sextet_mask); + c4append_idx_((val >> 12) & sextet_mask); + c4append_idx_((val >> 6) & sextet_mask); + c4append_('='); + } + else if(rem == 1) + { + const uint32_t val = ((uint32_t(d[0]) << 16)); + c4append_idx_((val >> 18) & sextet_mask); + c4append_idx_((val >> 12) & sextet_mask); + c4append_('='); + c4append_('='); + } + return pos; + + #undef c4append_ + #undef c4append_idx_ +} + + +size_t base64_decode(csubstr encoded, blob data) +{ + #define c4append_(c) { if(wpos < data.len) { data.buf[wpos] = static_cast(c); } ++wpos; } + #define c4appendval_(c, shift)\ + {\ + C4_XASSERT(c >= 0);\ + C4_XASSERT(size_t(c) < sizeof(detail::base64_char_to_sextet_));\ + val |= static_cast(detail::base64_char_to_sextet_[(c)]) << ((shift) * 6);\ + } + + C4_ASSERT(base64_valid(encoded)); + C4_CHECK(encoded.len % 4 == 0); + size_t wpos = 0; // the write position + const char *C4_RESTRICT d = encoded.str; + constexpr const uint32_t full_byte = 0xff; + // process every quartet of input 6 bits --> triplet of output bytes + for(size_t rpos = 0; rpos < encoded.len; rpos += 4, d += 4) + { + if(d[2] == '=' || d[3] == '=') // skip the last quartet if it is padded + { + C4_ASSERT(d + 4 == encoded.str + encoded.len); + break; + } + uint32_t val = 0; + c4appendval_(d[3], 0); + c4appendval_(d[2], 1); + c4appendval_(d[1], 2); + c4appendval_(d[0], 3); + c4append_((val >> (2 * 8)) & full_byte); + c4append_((val >> (1 * 8)) & full_byte); + c4append_((val ) & full_byte); + } + // deal with the last quartet when it is padded + if(d == encoded.str + encoded.len) + return wpos; + if(d[2] == '=') // 2 padding chars + { + C4_ASSERT(d + 4 == encoded.str + encoded.len); + C4_ASSERT(d[3] == '='); + uint32_t val = 0; + c4appendval_(d[1], 2); + c4appendval_(d[0], 3); + c4append_((val >> (2 * 8)) & full_byte); + } + else if(d[3] == '=') // 1 padding char + { + C4_ASSERT(d + 4 == encoded.str + encoded.len); + uint32_t val = 0; + c4appendval_(d[2], 1); + c4appendval_(d[1], 2); + c4appendval_(d[0], 3); + c4append_((val >> (2 * 8)) & full_byte); + c4append_((val >> (1 * 8)) & full_byte); + } + return wpos; + #undef c4append_ + #undef c4appendval_ +} + +} // namespace c4 + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/base64.cpp) + +#define C4_WINDOWS_POP_HPP_ + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/windows_push.hpp +// https://github.com/biojppm/c4core/src/c4/windows_push.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_WINDOWS_PUSH_HPP_ +#define _C4_WINDOWS_PUSH_HPP_ + +/** @file windows_push.hpp sets up macros to include windows header files + * without pulling in all of + * + * @see #include windows_pop.hpp to undefine these macros + * + * @see https://aras-p.info/blog/2018/01/12/Minimizing-windows.h/ */ + + +#if defined(_WIN64) || defined(_WIN32) + +#if defined(_M_AMD64) +# ifndef _AMD64_ +# define _c4_AMD64_ +# define _AMD64_ +# endif +#elif defined(_M_IX86) +# ifndef _X86_ +# define _c4_X86_ +# define _X86_ +# endif +#elif defined(_M_ARM64) +# ifndef _ARM64_ +# define _c4_ARM64_ +# define _ARM64_ +# endif +#elif defined(_M_ARM) +# ifndef _ARM_ +# define _c4_ARM_ +# define _ARM_ +# endif +#endif + +#ifndef NOMINMAX +# define _c4_NOMINMAX +# define NOMINMAX +#endif + +#ifndef NOGDI +# define _c4_NOGDI +# define NOGDI +#endif + +#ifndef VC_EXTRALEAN +# define _c4_VC_EXTRALEAN +# define VC_EXTRALEAN +#endif + +#ifndef WIN32_LEAN_AND_MEAN +# define _c4_WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +/* If defined, the following flags inhibit definition + * of the indicated items. + * + * NOGDICAPMASKS - CC_*, LC_*, PC_*, CP_*, TC_*, RC_ + * NOVIRTUALKEYCODES - VK_* + * NOWINMESSAGES - WM_*, EM_*, LB_*, CB_* + * NOWINSTYLES - WS_*, CS_*, ES_*, LBS_*, SBS_*, CBS_* + * NOSYSMETRICS - SM_* + * NOMENUS - MF_* + * NOICONS - IDI_* + * NOKEYSTATES - MK_* + * NOSYSCOMMANDS - SC_* + * NORASTEROPS - Binary and Tertiary raster ops + * NOSHOWWINDOW - SW_* + * OEMRESOURCE - OEM Resource values + * NOATOM - Atom Manager routines + * NOCLIPBOARD - Clipboard routines + * NOCOLOR - Screen colors + * NOCTLMGR - Control and Dialog routines + * NODRAWTEXT - DrawText() and DT_* + * NOGDI - All GDI defines and routines + * NOKERNEL - All KERNEL defines and routines + * NOUSER - All USER defines and routines + * NONLS - All NLS defines and routines + * NOMB - MB_* and MessageBox() + * NOMEMMGR - GMEM_*, LMEM_*, GHND, LHND, associated routines + * NOMETAFILE - typedef METAFILEPICT + * NOMINMAX - Macros min(a,b) and max(a,b) + * NOMSG - typedef MSG and associated routines + * NOOPENFILE - OpenFile(), OemToAnsi, AnsiToOem, and OF_* + * NOSCROLL - SB_* and scrolling routines + * NOSERVICE - All Service Controller routines, SERVICE_ equates, etc. + * NOSOUND - Sound driver routines + * NOTEXTMETRIC - typedef TEXTMETRIC and associated routines + * NOWH - SetWindowsHook and WH_* + * NOWINOFFSETS - GWL_*, GCL_*, associated routines + * NOCOMM - COMM driver routines + * NOKANJI - Kanji support stuff. + * NOHELP - Help engine interface. + * NOPROFILER - Profiler interface. + * NODEFERWINDOWPOS - DeferWindowPos routines + * NOMCX - Modem Configuration Extensions + */ + +#endif /* defined(_WIN64) || defined(_WIN32) */ + +#endif /* _C4_WINDOWS_PUSH_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/windows_push.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/windows.hpp +// https://github.com/biojppm/c4core/src/c4/windows.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_WINDOWS_HPP_ +#define _C4_WINDOWS_HPP_ + +#if defined(_WIN64) || defined(_WIN32) +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/windows_push.hpp +//#include "c4/windows_push.hpp" +#if !defined(C4_WINDOWS_PUSH_HPP_) && !defined(_C4_WINDOWS_PUSH_HPP_) +#error "amalgamate: file c4/windows_push.hpp must have been included at this point" +#endif /* C4_WINDOWS_PUSH_HPP_ */ + +#include +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/windows_pop.hpp +//#include "c4/windows_pop.hpp" +#if !defined(C4_WINDOWS_POP_HPP_) && !defined(_C4_WINDOWS_POP_HPP_) +#error "amalgamate: file c4/windows_pop.hpp must have been included at this point" +#endif /* C4_WINDOWS_POP_HPP_ */ + +#endif + +#endif /* _C4_WINDOWS_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/windows.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/windows_pop.hpp +// https://github.com/biojppm/c4core/src/c4/windows_pop.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_WINDOWS_POP_HPP_ +#define _C4_WINDOWS_POP_HPP_ + +#if defined(_WIN64) || defined(_WIN32) + +#ifdef _c4_AMD64_ +# undef _c4_AMD64_ +# undef _AMD64_ +#endif +#ifdef _c4_X86_ +# undef _c4_X86_ +# undef _X86_ +#endif +#ifdef _c4_ARM_ +# undef _c4_ARM_ +# undef _ARM_ +#endif + +#ifdef _c4_NOMINMAX +# undef _c4_NOMINMAX +# undef NOMINMAX +#endif + +#ifdef NOGDI +# undef _c4_NOGDI +# undef NOGDI +#endif + +#ifdef VC_EXTRALEAN +# undef _c4_VC_EXTRALEAN +# undef VC_EXTRALEAN +#endif + +#ifdef WIN32_LEAN_AND_MEAN +# undef _c4_WIN32_LEAN_AND_MEAN +# undef WIN32_LEAN_AND_MEAN +#endif + +#endif /* defined(_WIN64) || defined(_WIN32) */ + +#endif /* _C4_WINDOWS_POP_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/windows_pop.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/error.cpp +// https://github.com/biojppm/c4core/src/c4/error.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef C4CORE_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + + +//included above: +//#include +//included above: +//#include +//included above: +//#include + +#define C4_LOGF_ERR(...) fprintf(stderr, __VA_ARGS__); fflush(stderr) +#define C4_LOGF_WARN(...) fprintf(stderr, __VA_ARGS__); fflush(stderr) +#define C4_LOGP(msg, ...) printf(msg) + +#if defined(C4_XBOX) || (defined(C4_WIN) && defined(C4_MSVC)) +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/windows.hpp +//# include "c4/windows.hpp" +#if !defined(C4_WINDOWS_HPP_) && !defined(_C4_WINDOWS_HPP_) +#error "amalgamate: file c4/windows.hpp must have been included at this point" +#endif /* C4_WINDOWS_HPP_ */ + +#elif defined(C4_PS4) +# include +#elif defined(C4_UNIX) || defined(C4_LINUX) +# include +//included above: +//# include +# include +#elif defined(C4_MACOS) || defined(C4_IOS) +//included above: +//# include +# include +# include +# include +#endif +// the amalgamation tool is dumb and was omitting this include under MACOS. +// So do it only once: +#if defined(C4_UNIX) || defined(C4_LINUX) || defined(C4_MACOS) || defined(C4_IOS) +# include +#endif + +#if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION) +# include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wformat-nonliteral" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif + + +//----------------------------------------------------------------------------- +namespace c4 { + +static error_flags s_error_flags = ON_ERROR_DEFAULTS; +static error_callback_type s_error_callback = nullptr; + +//----------------------------------------------------------------------------- + +error_flags get_error_flags() +{ + return s_error_flags; +} +void set_error_flags(error_flags flags) +{ + s_error_flags = flags; +} + +error_callback_type get_error_callback() +{ + return s_error_callback; +} +/** Set the function which is called when an error occurs. */ +void set_error_callback(error_callback_type cb) +{ + s_error_callback = cb; +} + +//----------------------------------------------------------------------------- + +void handle_error(srcloc where, const char *fmt, ...) +{ + char buf[1024]; + size_t msglen = 0; + if(s_error_flags & (ON_ERROR_LOG|ON_ERROR_CALLBACK)) + { + va_list args; + va_start(args, fmt); + int ilen = vsnprintf(buf, sizeof(buf), fmt, args); // ss.vprintf(fmt, args); + va_end(args); + msglen = ilen >= 0 && ilen < (int)sizeof(buf) ? static_cast(ilen) : sizeof(buf)-1; + } + + if(s_error_flags & ON_ERROR_LOG) + { + C4_LOGF_ERR("\n"); +#if defined(C4_ERROR_SHOWS_FILELINE) && defined(C4_ERROR_SHOWS_FUNC) + C4_LOGF_ERR("%s:%d: ERROR: %s\n", where.file, where.line, buf); + C4_LOGF_ERR("%s:%d: ERROR here: %s\n", where.file, where.line, where.func); +#elif defined(C4_ERROR_SHOWS_FILELINE) + C4_LOGF_ERR("%s:%d: ERROR: %s\n", where.file, where.line, buf); +#elif ! defined(C4_ERROR_SHOWS_FUNC) + C4_LOGF_ERR("ERROR: %s\n", buf); +#endif + } + + if(s_error_flags & ON_ERROR_CALLBACK) + { + if(s_error_callback) + { + s_error_callback(buf, msglen/*ss.c_strp(), ss.tellp()*/); + } + } + + if(s_error_flags & ON_ERROR_ABORT) + { + abort(); + } + + if(s_error_flags & ON_ERROR_THROW) + { +#if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION) + throw Exception(buf); +#else + abort(); +#endif + } +} + +//----------------------------------------------------------------------------- + +void handle_warning(srcloc where, const char *fmt, ...) +{ + va_list args; + char buf[1024]; //sstream ss; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + C4_LOGF_WARN("\n"); +#if defined(C4_ERROR_SHOWS_FILELINE) && defined(C4_ERROR_SHOWS_FUNC) + C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/); + C4_LOGF_WARN("%s:%d: WARNING: here: %s\n", where.file, where.line, where.func); +#elif defined(C4_ERROR_SHOWS_FILELINE) + C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/); +#elif ! defined(C4_ERROR_SHOWS_FUNC) + C4_LOGF_WARN("WARNING: %s\n", buf/*ss.c_strp()*/); +#endif + //c4::log.flush(); +} + +//----------------------------------------------------------------------------- +bool is_debugger_attached() +{ +#if defined(C4_UNIX) || defined(C4_LINUX) + static bool first_call = true; + static bool first_call_result = false; + if(first_call) + { + first_call = false; + //! @see http://stackoverflow.com/questions/3596781/how-to-detect-if-the-current-process-is-being-run-by-gdb + //! (this answer: http://stackoverflow.com/a/24969863/3968589 ) + char buf[1024] = ""; + + int status_fd = open("/proc/self/status", O_RDONLY); + if (status_fd == -1) + { + return 0; + } + + ssize_t num_read = ::read(status_fd, buf, sizeof(buf)); + + if (num_read > 0) + { + static const char TracerPid[] = "TracerPid:"; + char *tracer_pid; + + if(num_read < 1024) + { + buf[num_read] = 0; + } + tracer_pid = strstr(buf, TracerPid); + if (tracer_pid) + { + first_call_result = !!::atoi(tracer_pid + sizeof(TracerPid) - 1); + } + } + } + return first_call_result; +#elif defined(C4_PS4) + return (sceDbgIsDebuggerAttached() != 0); +#elif defined(C4_XBOX) || (defined(C4_WIN) && defined(C4_MSVC)) + return IsDebuggerPresent() != 0; +#elif defined(C4_MACOS) || defined(C4_IOS) + // https://stackoverflow.com/questions/2200277/detecting-debugger-on-mac-os-x + // Returns true if the current process is being debugged (either + // running under the debugger or has a debugger attached post facto). + int junk; + int mib[4]; + struct kinfo_proc info; + size_t size; + + // Initialize the flags so that, if sysctl fails for some bizarre + // reason, we get a predictable result. + + info.kp_proc.p_flag = 0; + + // Initialize mib, which tells sysctl the info we want, in this case + // we're looking for information about a specific process ID. + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = getpid(); + + // Call sysctl. + + size = sizeof(info); + junk = sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, NULL, 0); + assert(junk == 0); + + // We're being debugged if the P_TRACED flag is set. + return ((info.kp_proc.p_flag & P_TRACED) != 0); +#else + return false; +#endif +} // is_debugger_attached() + +} // namespace c4 + + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/error.cpp) + +#endif /* _C4CORE_SINGLE_HEADER_AMALGAMATED_HPP_ */ + + + +// (end https://github.com/biojppm/rapidyaml/src/c4/c4core_all.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/export.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/export.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_YML_EXPORT_HPP_ +#define C4_YML_EXPORT_HPP_ + +#ifdef _WIN32 + #ifdef RYML_SHARED + #ifdef RYML_EXPORTS + #define RYML_EXPORT __declspec(dllexport) + #else + #define RYML_EXPORT __declspec(dllimport) + #endif + #else + #define RYML_EXPORT + #endif +#else + #define RYML_EXPORT +#endif + +#endif /* C4_YML_EXPORT_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/export.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/common.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_COMMON_HPP_ +#define _C4_YML_COMMON_HPP_ + +//included above: +//#include +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp +//#include +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/export.hpp +//#include +#if !defined(C4_YML_EXPORT_HPP_) && !defined(_C4_YML_EXPORT_HPP_) +#error "amalgamate: file c4/yml/export.hpp must have been included at this point" +#endif /* C4_YML_EXPORT_HPP_ */ + + + +#ifndef RYML_USE_ASSERT +# define RYML_USE_ASSERT C4_USE_ASSERT +#endif + + +#if RYML_USE_ASSERT +# define RYML_ASSERT(cond) RYML_CHECK(cond) +# define RYML_ASSERT_MSG(cond, msg) RYML_CHECK_MSG(cond, msg) +#else +# define RYML_ASSERT(cond) +# define RYML_ASSERT_MSG(cond, msg) +#endif + + +#if defined(NDEBUG) || defined(C4_NO_DEBUG_BREAK) +# define RYML_DEBUG_BREAK() +#else +# define RYML_DEBUG_BREAK() \ + { \ + if(c4::get_error_flags() & c4::ON_ERROR_DEBUGBREAK) \ + { \ + C4_DEBUG_BREAK(); \ + } \ + } +#endif + + +#define RYML_CHECK(cond) \ + do { \ + if(!(cond)) \ + { \ + RYML_DEBUG_BREAK() \ + c4::yml::error("check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ + } \ + } while(0) + +#define RYML_CHECK_MSG(cond, msg) \ + do \ + { \ + if(!(cond)) \ + { \ + RYML_DEBUG_BREAK() \ + c4::yml::error(msg ": check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ + } \ + } while(0) + + +#if C4_CPP >= 14 +# define RYML_DEPRECATED(msg) [[deprecated(msg)]] +#else +# if defined(_MSC_VER) +# define RYML_DEPRECATED(msg) __declspec(deprecated(msg)) +# else // defined(__GNUC__) || defined(__clang__) +# define RYML_DEPRECATED(msg) __attribute__((deprecated(msg))) +# endif +#endif + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace c4 { +namespace yml { + +enum : size_t { + /** a null position */ + npos = size_t(-1), + /** an index to none */ + NONE = size_t(-1) +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +//! holds a position into a source buffer +struct RYML_EXPORT LineCol +{ + //! number of bytes from the beginning of the source buffer + size_t offset; + //! line + size_t line; + //! column + size_t col; + + LineCol() : offset(), line(), col() {} + //! construct from line and column + LineCol(size_t l, size_t c) : offset(0), line(l), col(c) {} + //! construct from offset, line and column + LineCol(size_t o, size_t l, size_t c) : offset(o), line(l), col(c) {} +}; + + +//! a source file position +struct RYML_EXPORT Location : public LineCol +{ + csubstr name; + + operator bool () const { return !name.empty() || line != 0 || offset != 0; } + + Location() : LineCol(), name() {} + Location( size_t l, size_t c) : LineCol{ l, c}, name( ) {} + Location( csubstr n, size_t l, size_t c) : LineCol{ l, c}, name(n) {} + Location( csubstr n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(n) {} + Location(const char *n, size_t l, size_t c) : LineCol{ l, c}, name(to_csubstr(n)) {} + Location(const char *n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(to_csubstr(n)) {} +}; + + +//----------------------------------------------------------------------------- + +/** the type of the function used to report errors. This function must + * interrupt execution, either by raising an exception or calling + * std::abort(). + * + * @warning the error callback must never return: it must either abort + * or throw an exception. Otherwise, the parser will enter into an + * infinite loop, or the program may crash. */ +using pfn_error = void (*)(const char* msg, size_t msg_len, Location location, void *user_data); +/** the type of the function used to allocate memory */ +using pfn_allocate = void* (*)(size_t len, void* hint, void *user_data); +/** the type of the function used to free memory */ +using pfn_free = void (*)(void* mem, size_t size, void *user_data); + +/** trigger an error: call the current error callback. */ +RYML_EXPORT void error(const char *msg, size_t msg_len, Location loc); +/** @overload error */ +inline void error(const char *msg, size_t msg_len) +{ + error(msg, msg_len, Location{}); +} +/** @overload error */ +template +inline void error(const char (&msg)[N], Location loc) +{ + error(msg, N-1, loc); +} +/** @overload error */ +template +inline void error(const char (&msg)[N]) +{ + error(msg, N-1, Location{}); +} + +//----------------------------------------------------------------------------- + +/** a c-style callbacks class + * + * @warning the error callback must never return: it must either abort + * or throw an exception. Otherwise, the parser will enter into an + * infinite loop, or the program may crash. */ +struct RYML_EXPORT Callbacks +{ + void * m_user_data; + pfn_allocate m_allocate; + pfn_free m_free; + pfn_error m_error; + + Callbacks(); + Callbacks(void *user_data, pfn_allocate alloc, pfn_free free, pfn_error error_); + + bool operator!= (Callbacks const& that) const { return !operator==(that); } + bool operator== (Callbacks const& that) const + { + return (m_user_data == that.m_user_data && + m_allocate == that.m_allocate && + m_free == that.m_free && + m_error == that.m_error); + } +}; + +/** set the global callbacks. + * + * @warning the error callback must never return: it must either abort + * or throw an exception. Otherwise, the parser will enter into an + * infinite loop, or the program may crash. */ +RYML_EXPORT void set_callbacks(Callbacks const& c); +/// get the global callbacks +RYML_EXPORT Callbacks const& get_callbacks(); +/// set the global callbacks back to their defaults +RYML_EXPORT void reset_callbacks(); + +/// @cond dev +#define _RYML_CB_ERR(cb, msg_literal) \ +do \ +{ \ + const char msg[] = msg_literal; \ + RYML_DEBUG_BREAK() \ + (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ +} while(0) +#define _RYML_CB_CHECK(cb, cond) \ + do \ + { \ + if(!(cond)) \ + { \ + const char msg[] = "check failed: " #cond; \ + RYML_DEBUG_BREAK() \ + (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ + } \ + } while(0) +#ifdef RYML_USE_ASSERT +#define _RYML_CB_ASSERT(cb, cond) _RYML_CB_CHECK((cb), (cond)) +#else +#define _RYML_CB_ASSERT(cb, cond) do {} while(0) +#endif +#define _RYML_CB_ALLOC_HINT(cb, T, num, hint) (T*) (cb).m_allocate((num) * sizeof(T), (hint), (cb).m_user_data) +#define _RYML_CB_ALLOC(cb, T, num) _RYML_CB_ALLOC_HINT((cb), (T), (num), nullptr) +#define _RYML_CB_FREE(cb, buf, T, num) \ + do { \ + (cb).m_free((buf), (num) * sizeof(T), (cb).m_user_data); \ + (buf) = nullptr; \ + } while(0) + + + +namespace detail { +template +struct _charconstant_t + : public std::conditional::value, + std::integral_constant, + std::integral_constant>::type +{}; +#define _RYML_CHCONST(signedval, unsignedval) ::c4::yml::detail::_charconstant_t::value +} // namespace detail + + +namespace detail { +struct _SubstrWriter +{ + substr buf; + size_t pos; + _SubstrWriter(substr buf_, size_t pos_=0) : buf(buf_), pos(pos_) {} + void append(csubstr s) + { + C4_ASSERT(!s.overlaps(buf)); + if(pos + s.len <= buf.len) + memcpy(buf.str + pos, s.str, s.len); + pos += s.len; + } + void append(char c) + { + if(pos < buf.len) + buf.str[pos] = c; + ++pos; + } + void append_n(char c, size_t numtimes) + { + if(pos + numtimes < buf.len) + memset(buf.str + pos, c, numtimes); + pos += numtimes; + } + size_t slack() const { return pos <= buf.len ? buf.len - pos : 0; } + size_t excess() const { return pos > buf.len ? pos - buf.len : 0; } + //! get the part written so far + csubstr curr() const { return pos <= buf.len ? buf.first(pos) : buf; } + //! get the part that is still free to write to (the remainder) + substr rem() { return pos < buf.len ? buf.sub(pos) : buf.last(0); } + + size_t advance(size_t more) { pos += more; return pos; } +}; +} // namespace detail + +/// @endcond + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_COMMON_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/tree.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_TREE_HPP_ +#define _C4_YML_TREE_HPP_ + + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/types.hpp +//#include "c4/types.hpp" +#if !defined(C4_TYPES_HPP_) && !defined(_C4_TYPES_HPP_) +#error "amalgamate: file c4/types.hpp must have been included at this point" +#endif /* C4_TYPES_HPP_ */ + +#ifndef _C4_YML_COMMON_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ + +#endif + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp +//#include +#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_) +#error "amalgamate: file c4/charconv.hpp must have been included at this point" +#endif /* C4_CHARCONV_HPP_ */ + +//included above: +//#include +//included above: +//#include + + +C4_SUPPRESS_WARNING_MSVC_PUSH +C4_SUPPRESS_WARNING_MSVC(4251) // needs to have dll-interface to be used by clients of struct +C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value' +C4_SUPPRESS_WARNING_GCC_CLANG_PUSH +C4_SUPPRESS_WARNING_GCC("-Wtype-limits") + + +namespace c4 { +namespace yml { + +struct NodeScalar; +struct NodeInit; +struct NodeData; +class NodeRef; +class ConstNodeRef; +class Tree; + + +/** encode a floating point value to a string. */ +template +size_t to_chars_float(substr buf, T val) +{ + C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wfloat-equal"); + static_assert(std::is_floating_point::value, "must be floating point"); + if(C4_UNLIKELY(std::isnan(val))) + return to_chars(buf, csubstr(".nan")); + else if(C4_UNLIKELY(val == std::numeric_limits::infinity())) + return to_chars(buf, csubstr(".inf")); + else if(C4_UNLIKELY(val == -std::numeric_limits::infinity())) + return to_chars(buf, csubstr("-.inf")); + return to_chars(buf, val); + C4_SUPPRESS_WARNING_GCC_CLANG_POP +} + + +/** decode a floating point from string. Accepts special values: .nan, + * .inf, -.inf */ +template +bool from_chars_float(csubstr buf, T *C4_RESTRICT val) +{ + static_assert(std::is_floating_point::value, "must be floating point"); + if(C4_LIKELY(from_chars(buf, val))) + { + return true; + } + else if(C4_UNLIKELY(buf == ".nan" || buf == ".NaN" || buf == ".NAN")) + { + *val = std::numeric_limits::quiet_NaN(); + return true; + } + else if(C4_UNLIKELY(buf == ".inf" || buf == ".Inf" || buf == ".INF")) + { + *val = std::numeric_limits::infinity(); + return true; + } + else if(C4_UNLIKELY(buf == "-.inf" || buf == "-.Inf" || buf == "-.INF")) + { + *val = -std::numeric_limits::infinity(); + return true; + } + else + { + return false; + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** the integral type necessary to cover all the bits marking node tags */ +using tag_bits = uint16_t; + +/** a bit mask for marking tags for types */ +typedef enum : tag_bits { + // container types + TAG_NONE = 0, + TAG_MAP = 1, /**< !!map Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */ + TAG_OMAP = 2, /**< !!omap Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */ + TAG_PAIRS = 3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */ + TAG_SET = 4, /**< !!set Unordered set of non-equal values. @see https://yaml.org/type/set.html */ + TAG_SEQ = 5, /**< !!seq Sequence of arbitrary values. @see https://yaml.org/type/seq.html */ + // scalar types + TAG_BINARY = 6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */ + TAG_BOOL = 7, /**< !!bool Mathematical Booleans. @see https://yaml.org/type/bool.html */ + TAG_FLOAT = 8, /**< !!float Floating-point approximation to real numbers. https://yaml.org/type/float.html */ + TAG_INT = 9, /**< !!float Mathematical integers. https://yaml.org/type/int.html */ + TAG_MERGE = 10, /**< !!merge Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */ + TAG_NULL = 11, /**< !!null Devoid of value. https://yaml.org/type/null.html */ + TAG_STR = 12, /**< !!str A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */ + TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */ + TAG_VALUE = 14, /**< !!value Specify the default value of a mapping https://yaml.org/type/value.html */ + TAG_YAML = 15, /**< !!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html */ +} YamlTag_e; + +YamlTag_e to_tag(csubstr tag); +csubstr from_tag(YamlTag_e tag); +csubstr from_tag_long(YamlTag_e tag); +csubstr normalize_tag(csubstr tag); +csubstr normalize_tag_long(csubstr tag); + +struct TagDirective +{ + /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr handle; + /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr prefix; + /** The next node to which this tag directive applies */ + size_t next_node_id; +}; + +#ifndef RYML_MAX_TAG_DIRECTIVES +/** the maximum number of tag directives in a Tree */ +#define RYML_MAX_TAG_DIRECTIVES 4 +#endif + + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + + +/** the integral type necessary to cover all the bits marking node types */ +using type_bits = uint64_t; + + +/** a bit mask for marking node types */ +typedef enum : type_bits { + // a convenience define, undefined below + #define c4bit(v) (type_bits(1) << v) + NOTYPE = 0, ///< no node type is set + VAL = c4bit(0), ///< a leaf node, has a (possibly empty) value + KEY = c4bit(1), ///< is member of a map, must have non-empty key + MAP = c4bit(2), ///< a map: a parent of keyvals + SEQ = c4bit(3), ///< a seq: a parent of vals + DOC = c4bit(4), ///< a document + STREAM = c4bit(5)|SEQ, ///< a stream: a seq of docs + KEYREF = c4bit(6), ///< a *reference: the key references an &anchor + VALREF = c4bit(7), ///< a *reference: the val references an &anchor + KEYANCH = c4bit(8), ///< the key has an &anchor + VALANCH = c4bit(9), ///< the val has an &anchor + KEYTAG = c4bit(10), ///< the key has an explicit tag/type + VALTAG = c4bit(11), ///< the val has an explicit tag/type + _TYMASK = c4bit(12)-1, // all the bits up to here + VALQUO = c4bit(12), ///< the val is quoted by '', "", > or | + KEYQUO = c4bit(13), ///< the key is quoted by '', "", > or | + KEYVAL = KEY|VAL, + KEYSEQ = KEY|SEQ, + KEYMAP = KEY|MAP, + DOCMAP = DOC|MAP, + DOCSEQ = DOC|SEQ, + DOCVAL = DOC|VAL, + _KEYMASK = KEY | KEYQUO | KEYANCH | KEYREF | KEYTAG, + _VALMASK = VAL | VALQUO | VALANCH | VALREF | VALTAG, + // these flags are from a work in progress and should not be used yet + _WIP_STYLE_FLOW_SL = c4bit(14), ///< mark container with single-line flow format (seqs as '[val1,val2], maps as '{key: val, key2: val2}') + _WIP_STYLE_FLOW_ML = c4bit(15), ///< mark container with multi-line flow format (seqs as '[val1,\nval2], maps as '{key: val,\nkey2: val2}') + _WIP_STYLE_BLOCK = c4bit(16), ///< mark container with block format (seqs as '- val\n', maps as 'key: val') + _WIP_KEY_LITERAL = c4bit(17), ///< mark key scalar as multiline, block literal | + _WIP_VAL_LITERAL = c4bit(18), ///< mark val scalar as multiline, block literal | + _WIP_KEY_FOLDED = c4bit(19), ///< mark key scalar as multiline, block folded > + _WIP_VAL_FOLDED = c4bit(20), ///< mark val scalar as multiline, block folded > + _WIP_KEY_SQUO = c4bit(21), ///< mark key scalar as single quoted + _WIP_VAL_SQUO = c4bit(22), ///< mark val scalar as single quoted + _WIP_KEY_DQUO = c4bit(23), ///< mark key scalar as double quoted + _WIP_VAL_DQUO = c4bit(24), ///< mark val scalar as double quoted + _WIP_KEY_PLAIN = c4bit(25), ///< mark key scalar as plain scalar (unquoted, even when multiline) + _WIP_VAL_PLAIN = c4bit(26), ///< mark val scalar as plain scalar (unquoted, even when multiline) + _WIP_KEY_STYLE = _WIP_KEY_LITERAL|_WIP_KEY_FOLDED|_WIP_KEY_SQUO|_WIP_KEY_DQUO|_WIP_KEY_PLAIN, + _WIP_VAL_STYLE = _WIP_VAL_LITERAL|_WIP_VAL_FOLDED|_WIP_VAL_SQUO|_WIP_VAL_DQUO|_WIP_VAL_PLAIN, + _WIP_KEY_FT_NL = c4bit(27), ///< features: mark key scalar as having \n in its contents + _WIP_VAL_FT_NL = c4bit(28), ///< features: mark val scalar as having \n in its contents + _WIP_KEY_FT_SQ = c4bit(29), ///< features: mark key scalar as having single quotes in its contents + _WIP_VAL_FT_SQ = c4bit(30), ///< features: mark val scalar as having single quotes in its contents + _WIP_KEY_FT_DQ = c4bit(31), ///< features: mark key scalar as having double quotes in its contents + _WIP_VAL_FT_DQ = c4bit(32), ///< features: mark val scalar as having double quotes in its contents + #undef c4bit +} NodeType_e; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** wraps a NodeType_e element with some syntactic sugar and predicates */ +struct NodeType +{ +public: + + NodeType_e type; + +public: + + C4_ALWAYS_INLINE NodeType() : type(NOTYPE) {} + C4_ALWAYS_INLINE NodeType(NodeType_e t) : type(t) {} + C4_ALWAYS_INLINE NodeType(type_bits t) : type((NodeType_e)t) {} + + C4_ALWAYS_INLINE const char *type_str() const { return type_str(type); } + static const char* type_str(NodeType_e t); + + C4_ALWAYS_INLINE void set(NodeType_e t) { type = t; } + C4_ALWAYS_INLINE void set(type_bits t) { type = (NodeType_e)t; } + + C4_ALWAYS_INLINE void add(NodeType_e t) { type = (NodeType_e)(type|t); } + C4_ALWAYS_INLINE void add(type_bits t) { type = (NodeType_e)(type|t); } + + C4_ALWAYS_INLINE void rem(NodeType_e t) { type = (NodeType_e)(type & ~t); } + C4_ALWAYS_INLINE void rem(type_bits t) { type = (NodeType_e)(type & ~t); } + + C4_ALWAYS_INLINE void clear() { type = NOTYPE; } + +public: + + C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () { return type; } + C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; } + + C4_ALWAYS_INLINE bool operator== (NodeType_e t) const { return type == t; } + C4_ALWAYS_INLINE bool operator!= (NodeType_e t) const { return type != t; } + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + C4_ALWAYS_INLINE bool is_notype() const { return type == NOTYPE; } + C4_ALWAYS_INLINE bool is_stream() const { return ((type & STREAM) == STREAM) != 0; } + C4_ALWAYS_INLINE bool is_doc() const { return (type & DOC) != 0; } + C4_ALWAYS_INLINE bool is_container() const { return (type & (MAP|SEQ|STREAM)) != 0; } + C4_ALWAYS_INLINE bool is_map() const { return (type & MAP) != 0; } + C4_ALWAYS_INLINE bool is_seq() const { return (type & SEQ) != 0; } + C4_ALWAYS_INLINE bool has_key() const { return (type & KEY) != 0; } + C4_ALWAYS_INLINE bool has_val() const { return (type & VAL) != 0; } + C4_ALWAYS_INLINE bool is_val() const { return (type & KEYVAL) == VAL; } + C4_ALWAYS_INLINE bool is_keyval() const { return (type & KEYVAL) == KEYVAL; } + C4_ALWAYS_INLINE bool has_key_tag() const { return (type & (KEY|KEYTAG)) == (KEY|KEYTAG); } + C4_ALWAYS_INLINE bool has_val_tag() const { return ((type & VALTAG) && (type & (VAL|MAP|SEQ))); } + C4_ALWAYS_INLINE bool has_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } + C4_ALWAYS_INLINE bool is_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } + C4_ALWAYS_INLINE bool has_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } + C4_ALWAYS_INLINE bool is_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } + C4_ALWAYS_INLINE bool has_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } + C4_ALWAYS_INLINE bool is_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } + C4_ALWAYS_INLINE bool is_key_ref() const { return (type & KEYREF) != 0; } + C4_ALWAYS_INLINE bool is_val_ref() const { return (type & VALREF) != 0; } + C4_ALWAYS_INLINE bool is_ref() const { return (type & (KEYREF|VALREF)) != 0; } + C4_ALWAYS_INLINE bool is_anchor_or_ref() const { return (type & (KEYANCH|VALANCH|KEYREF|VALREF)) != 0; } + C4_ALWAYS_INLINE bool is_key_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO); } + C4_ALWAYS_INLINE bool is_val_quoted() const { return (type & (VAL|VALQUO)) == (VAL|VALQUO); } + C4_ALWAYS_INLINE bool is_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO) || (type & (VAL|VALQUO)) == (VAL|VALQUO); } + + // these predicates are a work in progress and subject to change. Don't use yet. + C4_ALWAYS_INLINE bool default_block() const { return (type & (_WIP_STYLE_BLOCK|_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) == 0; } + C4_ALWAYS_INLINE bool marked_block() const { return (type & (_WIP_STYLE_BLOCK)) != 0; } + C4_ALWAYS_INLINE bool marked_flow_sl() const { return (type & (_WIP_STYLE_FLOW_SL)) != 0; } + C4_ALWAYS_INLINE bool marked_flow_ml() const { return (type & (_WIP_STYLE_FLOW_ML)) != 0; } + C4_ALWAYS_INLINE bool marked_flow() const { return (type & (_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) != 0; } + C4_ALWAYS_INLINE bool key_marked_literal() const { return (type & (_WIP_KEY_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool val_marked_literal() const { return (type & (_WIP_VAL_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool key_marked_folded() const { return (type & (_WIP_KEY_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool val_marked_folded() const { return (type & (_WIP_VAL_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool key_marked_squo() const { return (type & (_WIP_KEY_SQUO)) != 0; } + C4_ALWAYS_INLINE bool val_marked_squo() const { return (type & (_WIP_VAL_SQUO)) != 0; } + C4_ALWAYS_INLINE bool key_marked_dquo() const { return (type & (_WIP_KEY_DQUO)) != 0; } + C4_ALWAYS_INLINE bool val_marked_dquo() const { return (type & (_WIP_VAL_DQUO)) != 0; } + C4_ALWAYS_INLINE bool key_marked_plain() const { return (type & (_WIP_KEY_PLAIN)) != 0; } + C4_ALWAYS_INLINE bool val_marked_plain() const { return (type & (_WIP_VAL_PLAIN)) != 0; } + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** a node scalar is a csubstr, which may be tagged and anchored. */ +struct NodeScalar +{ + csubstr tag; + csubstr scalar; + csubstr anchor; + +public: + + /// initialize as an empty scalar + inline NodeScalar() noexcept : tag(), scalar(), anchor() {} + + /// initialize as an untagged scalar + template + inline NodeScalar(const char (&s)[N]) noexcept : tag(), scalar(s), anchor() {} + inline NodeScalar(csubstr s ) noexcept : tag(), scalar(s), anchor() {} + + /// initialize as a tagged scalar + template + inline NodeScalar(const char (&t)[N], const char (&s)[N]) noexcept : tag(t), scalar(s), anchor() {} + inline NodeScalar(csubstr t , csubstr s ) noexcept : tag(t), scalar(s), anchor() {} + +public: + + ~NodeScalar() noexcept = default; + NodeScalar(NodeScalar &&) noexcept = default; + NodeScalar(NodeScalar const&) noexcept = default; + NodeScalar& operator= (NodeScalar &&) noexcept = default; + NodeScalar& operator= (NodeScalar const&) noexcept = default; + +public: + + bool empty() const noexcept { return tag.empty() && scalar.empty() && anchor.empty(); } + + void clear() noexcept { tag.clear(); scalar.clear(); anchor.clear(); } + + void set_ref_maybe_replacing_scalar(csubstr ref, bool has_scalar) noexcept + { + csubstr trimmed = ref.begins_with('*') ? ref.sub(1) : ref; + anchor = trimmed; + if((!has_scalar) || !scalar.ends_with(trimmed)) + scalar = ref; + } +}; +C4_MUST_BE_TRIVIAL_COPY(NodeScalar); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** convenience class to initialize nodes */ +struct NodeInit +{ + + NodeType type; + NodeScalar key; + NodeScalar val; + +public: + + /// initialize as an empty node + NodeInit() : type(NOTYPE), key(), val() {} + /// initialize as a typed node + NodeInit(NodeType_e t) : type(t), key(), val() {} + /// initialize as a sequence member + NodeInit(NodeScalar const& v) : type(VAL), key(), val(v) { _add_flags(); } + /// initialize as a mapping member + NodeInit( NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } + /// initialize as a mapping member with explicit type + NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t ), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } + /// initialize as a mapping member with explicit type (eg SEQ or MAP) + NodeInit(NodeType_e t, NodeScalar const& k ) : type(t ), key(k.tag, k.scalar), val( ) { _add_flags(KEY); } + +public: + + void clear() + { + type.clear(); + key.clear(); + val.clear(); + } + + void _add_flags(type_bits more_flags=0) + { + type = (type|more_flags); + if( ! key.tag.empty()) + type = (type|KEYTAG); + if( ! val.tag.empty()) + type = (type|VALTAG); + if( ! key.anchor.empty()) + type = (type|KEYANCH); + if( ! val.anchor.empty()) + type = (type|VALANCH); + } + + bool _check() const + { + // key cannot be empty + RYML_ASSERT(key.scalar.empty() == ((type & KEY) == 0)); + // key tag cannot be empty + RYML_ASSERT(key.tag.empty() == ((type & KEYTAG) == 0)); + // val may be empty even though VAL is set. But when VAL is not set, val must be empty + RYML_ASSERT(((type & VAL) != 0) || val.scalar.empty()); + // val tag cannot be empty + RYML_ASSERT(val.tag.empty() == ((type & VALTAG) == 0)); + return true; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** contains the data for each YAML node. */ +struct NodeData +{ + NodeType m_type; + + NodeScalar m_key; + NodeScalar m_val; + + size_t m_parent; + size_t m_first_child; + size_t m_last_child; + size_t m_next_sibling; + size_t m_prev_sibling; +}; +C4_MUST_BE_TRIVIAL_COPY(NodeData); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +class RYML_EXPORT Tree +{ +public: + + /** @name construction and assignment */ + /** @{ */ + + Tree() : Tree(get_callbacks()) {} + Tree(Callbacks const& cb); + Tree(size_t node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {} + Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb); + + ~Tree(); + + Tree(Tree const& that) noexcept; + Tree(Tree && that) noexcept; + + Tree& operator= (Tree const& that) noexcept; + Tree& operator= (Tree && that) noexcept; + + /** @} */ + +public: + + /** @name memory and sizing */ + /** @{ */ + + void reserve(size_t node_capacity); + + /** clear the tree and zero every node + * @note does NOT clear the arena + * @see clear_arena() */ + void clear(); + inline void clear_arena() { m_arena_pos = 0; } + + inline bool empty() const { return m_size == 0; } + + inline size_t size() const { return m_size; } + inline size_t capacity() const { return m_cap; } + inline size_t slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; } + + Callbacks const& callbacks() const { return m_callbacks; } + void callbacks(Callbacks const& cb) { m_callbacks = cb; } + + /** @} */ + +public: + + /** @name node getters */ + /** @{ */ + + //! get the index of a node belonging to this tree. + //! @p n can be nullptr, in which case a + size_t id(NodeData const* n) const + { + if( ! n) + { + return NONE; + } + RYML_ASSERT(n >= m_buf && n < m_buf + m_cap); + return static_cast(n - m_buf); + } + + //! get a pointer to a node's NodeData. + //! i can be NONE, in which case a nullptr is returned + inline NodeData *get(size_t i) + { + if(i == NONE) + return nullptr; + RYML_ASSERT(i >= 0 && i < m_cap); + return m_buf + i; + } + //! get a pointer to a node's NodeData. + //! i can be NONE, in which case a nullptr is returned. + inline NodeData const *get(size_t i) const + { + if(i == NONE) + return nullptr; + RYML_ASSERT(i >= 0 && i < m_cap); + return m_buf + i; + } + + //! An if-less form of get() that demands a valid node index. + //! This function is implementation only; use at your own risk. + inline NodeData * _p(size_t i) { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } + //! An if-less form of get() that demands a valid node index. + //! This function is implementation only; use at your own risk. + inline NodeData const * _p(size_t i) const { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } + + //! Get the id of the root node + size_t root_id() { if(m_cap == 0) { reserve(16); } RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } + //! Get the id of the root node + size_t root_id() const { RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } + + //! Get a NodeRef of a node by id + NodeRef ref(size_t id); + //! Get a NodeRef of a node by id + ConstNodeRef ref(size_t id) const; + //! Get a NodeRef of a node by id + ConstNodeRef cref(size_t id); + //! Get a NodeRef of a node by id + ConstNodeRef cref(size_t id) const; + + //! Get the root as a NodeRef + NodeRef rootref(); + //! Get the root as a NodeRef + ConstNodeRef rootref() const; + //! Get the root as a NodeRef + ConstNodeRef crootref(); + //! Get the root as a NodeRef + ConstNodeRef crootref() const; + + //! find a root child by name, return it as a NodeRef + //! @note requires the root to be a map. + NodeRef operator[] (csubstr key); + //! find a root child by name, return it as a NodeRef + //! @note requires the root to be a map. + ConstNodeRef operator[] (csubstr key) const; + + //! find a root child by index: return the root node's @p i-th child as a NodeRef + //! @note @i is NOT the node id, but the child's position + NodeRef operator[] (size_t i); + //! find a root child by index: return the root node's @p i-th child as a NodeRef + //! @note @i is NOT the node id, but the child's position + ConstNodeRef operator[] (size_t i) const; + + //! get the i-th document of the stream + //! @note @i is NOT the node id, but the doc position within the stream + NodeRef docref(size_t i); + //! get the i-th document of the stream + //! @note @i is NOT the node id, but the doc position within the stream + ConstNodeRef docref(size_t i) const; + + /** @} */ + +public: + + /** @name node property getters */ + /** @{ */ + + NodeType type(size_t node) const { return _p(node)->m_type; } + const char* type_str(size_t node) const { return NodeType::type_str(_p(node)->m_type); } + + csubstr const& key (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key.scalar; } + csubstr const& key_tag (size_t node) const { RYML_ASSERT(has_key_tag(node)); return _p(node)->m_key.tag; } + csubstr const& key_ref (size_t node) const { RYML_ASSERT(is_key_ref(node) && ! has_key_anchor(node)); return _p(node)->m_key.anchor; } + csubstr const& key_anchor(size_t node) const { RYML_ASSERT( ! is_key_ref(node) && has_key_anchor(node)); return _p(node)->m_key.anchor; } + NodeScalar const& keysc (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key; } + + csubstr const& val (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val.scalar; } + csubstr const& val_tag (size_t node) const { RYML_ASSERT(has_val_tag(node)); return _p(node)->m_val.tag; } + csubstr const& val_ref (size_t node) const { RYML_ASSERT(is_val_ref(node) && ! has_val_anchor(node)); return _p(node)->m_val.anchor; } + csubstr const& val_anchor(size_t node) const { RYML_ASSERT( ! is_val_ref(node) && has_val_anchor(node)); return _p(node)->m_val.anchor; } + NodeScalar const& valsc (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val; } + + /** @} */ + +public: + + /** @name node predicates */ + /** @{ */ + + C4_ALWAYS_INLINE bool is_stream(size_t node) const { return _p(node)->m_type.is_stream(); } + C4_ALWAYS_INLINE bool is_doc(size_t node) const { return _p(node)->m_type.is_doc(); } + C4_ALWAYS_INLINE bool is_container(size_t node) const { return _p(node)->m_type.is_container(); } + C4_ALWAYS_INLINE bool is_map(size_t node) const { return _p(node)->m_type.is_map(); } + C4_ALWAYS_INLINE bool is_seq(size_t node) const { return _p(node)->m_type.is_seq(); } + C4_ALWAYS_INLINE bool has_key(size_t node) const { return _p(node)->m_type.has_key(); } + C4_ALWAYS_INLINE bool has_val(size_t node) const { return _p(node)->m_type.has_val(); } + C4_ALWAYS_INLINE bool is_val(size_t node) const { return _p(node)->m_type.is_val(); } + C4_ALWAYS_INLINE bool is_keyval(size_t node) const { return _p(node)->m_type.is_keyval(); } + C4_ALWAYS_INLINE bool has_key_tag(size_t node) const { return _p(node)->m_type.has_key_tag(); } + C4_ALWAYS_INLINE bool has_val_tag(size_t node) const { return _p(node)->m_type.has_val_tag(); } + C4_ALWAYS_INLINE bool has_key_anchor(size_t node) const { return _p(node)->m_type.has_key_anchor(); } + C4_ALWAYS_INLINE bool is_key_anchor(size_t node) const { return _p(node)->m_type.is_key_anchor(); } + C4_ALWAYS_INLINE bool has_val_anchor(size_t node) const { return _p(node)->m_type.has_val_anchor(); } + C4_ALWAYS_INLINE bool is_val_anchor(size_t node) const { return _p(node)->m_type.is_val_anchor(); } + C4_ALWAYS_INLINE bool has_anchor(size_t node) const { return _p(node)->m_type.has_anchor(); } + C4_ALWAYS_INLINE bool is_anchor(size_t node) const { return _p(node)->m_type.is_anchor(); } + C4_ALWAYS_INLINE bool is_key_ref(size_t node) const { return _p(node)->m_type.is_key_ref(); } + C4_ALWAYS_INLINE bool is_val_ref(size_t node) const { return _p(node)->m_type.is_val_ref(); } + C4_ALWAYS_INLINE bool is_ref(size_t node) const { return _p(node)->m_type.is_ref(); } + C4_ALWAYS_INLINE bool is_anchor_or_ref(size_t node) const { return _p(node)->m_type.is_anchor_or_ref(); } + C4_ALWAYS_INLINE bool is_key_quoted(size_t node) const { return _p(node)->m_type.is_key_quoted(); } + C4_ALWAYS_INLINE bool is_val_quoted(size_t node) const { return _p(node)->m_type.is_val_quoted(); } + C4_ALWAYS_INLINE bool is_quoted(size_t node) const { return _p(node)->m_type.is_quoted(); } + + C4_ALWAYS_INLINE bool parent_is_seq(size_t node) const { RYML_ASSERT(has_parent(node)); return is_seq(_p(node)->m_parent); } + C4_ALWAYS_INLINE bool parent_is_map(size_t node) const { RYML_ASSERT(has_parent(node)); return is_map(_p(node)->m_parent); } + + /** true when key and val are empty, and has no children */ + C4_ALWAYS_INLINE bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } + /** true when the node has an anchor named a */ + C4_ALWAYS_INLINE bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } + + C4_ALWAYS_INLINE bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_key_quoted() && _is_null(n->m_key.scalar); } + C4_ALWAYS_INLINE bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); NodeData const* C4_RESTRICT n = _p(node); return !n->m_type.is_val_quoted() && _is_null(n->m_val.scalar); } + static bool _is_null(csubstr s) noexcept + { + return s.str == nullptr || + s == "~" || + s == "null" || + s == "Null" || + s == "NULL"; + } + + /** @} */ + +public: + + /** @name hierarchy predicates */ + /** @{ */ + + bool is_root(size_t node) const { RYML_ASSERT(_p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; } + + bool has_parent(size_t node) const { return _p(node)->m_parent != NONE; } + + /** true if @p node has a child with id @p ch */ + bool has_child(size_t node, size_t ch) const { return _p(ch)->m_parent == node; } + /** true if @p node has a child with key @p key */ + bool has_child(size_t node, csubstr key) const { return find_child(node, key) != npos; } + /** true if @p node has any children key */ + bool has_children(size_t node) const { return _p(node)->m_first_child != NONE; } + + /** true if @p node has a sibling with id @p sib */ + bool has_sibling(size_t node, size_t sib) const { return _p(node)->m_parent == _p(sib)->m_parent; } + /** true if one of the node's siblings has the given key */ + bool has_sibling(size_t node, csubstr key) const { return find_sibling(node, key) != npos; } + /** true if node is not a single child */ + bool has_other_siblings(size_t node) const + { + NodeData const *n = _p(node); + if(C4_LIKELY(n->m_parent != NONE)) + { + n = _p(n->m_parent); + return n->m_first_child != n->m_last_child; + } + return false; + } + + RYML_DEPRECATED("use has_other_siblings()") bool has_siblings(size_t /*node*/) const { return true; } + + /** @} */ + +public: + + /** @name hierarchy getters */ + /** @{ */ + + size_t parent(size_t node) const { return _p(node)->m_parent; } + + size_t prev_sibling(size_t node) const { return _p(node)->m_prev_sibling; } + size_t next_sibling(size_t node) const { return _p(node)->m_next_sibling; } + + /** O(#num_children) */ + size_t num_children(size_t node) const; + size_t child_pos(size_t node, size_t ch) const; + size_t first_child(size_t node) const { return _p(node)->m_first_child; } + size_t last_child(size_t node) const { return _p(node)->m_last_child; } + size_t child(size_t node, size_t pos) const; + size_t find_child(size_t node, csubstr const& key) const; + + /** O(#num_siblings) */ + /** counts with this */ + size_t num_siblings(size_t node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); } + /** does not count with this */ + size_t num_other_siblings(size_t node) const { size_t ns = num_siblings(node); RYML_ASSERT(ns > 0); return ns-1; } + size_t sibling_pos(size_t node, size_t sib) const { RYML_ASSERT( ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); } + size_t first_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; } + size_t last_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; } + size_t sibling(size_t node, size_t pos) const { return child(_p(node)->m_parent, pos); } + size_t find_sibling(size_t node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); } + + size_t doc(size_t i) const { size_t rid = root_id(); RYML_ASSERT(is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream. + + /** @} */ + +public: + + /** @name node modifiers */ + /** @{ */ + + void to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags=0); + void to_map(size_t node, csubstr key, type_bits more_flags=0); + void to_seq(size_t node, csubstr key, type_bits more_flags=0); + void to_val(size_t node, csubstr val, type_bits more_flags=0); + void to_map(size_t node, type_bits more_flags=0); + void to_seq(size_t node, type_bits more_flags=0); + void to_doc(size_t node, type_bits more_flags=0); + void to_stream(size_t node, type_bits more_flags=0); + + void set_key(size_t node, csubstr key) { RYML_ASSERT(has_key(node)); _p(node)->m_key.scalar = key; } + void set_val(size_t node, csubstr val) { RYML_ASSERT(has_val(node)); _p(node)->m_val.scalar = val; } + + void set_key_tag(size_t node, csubstr tag) { RYML_ASSERT(has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); } + void set_val_tag(size_t node, csubstr tag) { RYML_ASSERT(has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); } + + void set_key_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); } + void set_val_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); } + void set_key_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); } + void set_val_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); } + + void rem_key_anchor(size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); } + void rem_val_anchor(size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); } + void rem_key_ref (size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); } + void rem_val_ref (size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); } + void rem_anchor_ref(size_t node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); } + + /** @} */ + +public: + + /** @name tree modifiers */ + /** @{ */ + + /** reorder the tree in memory so that all the nodes are stored + * in a linear sequence when visited in depth-first order. + * This will invalidate existing ids, since the node id is its + * position in the node array. */ + void reorder(); + + /** Resolve references (aliases <- anchors) in the tree. + * + * Dereferencing is opt-in; after parsing, Tree::resolve() + * has to be called explicitly for obtaining resolved references in the + * tree. This method will resolve all references and substitute the + * anchored values in place of the reference. + * + * This method first does a full traversal of the tree to gather all + * anchors and references in a separate collection, then it goes through + * that collection to locate the names, which it does by obeying the YAML + * standard diktat that "an alias node refers to the most recent node in + * the serialization having the specified anchor" + * + * So, depending on the number of anchor/alias nodes, this is a + * potentially expensive operation, with a best-case linear complexity + * (from the initial traversal). This potential cost is the reason for + * requiring an explicit call. + */ + void resolve(); + + /** @} */ + +public: + + /** @name tag directives */ + /** @{ */ + + void resolve_tags(); + + size_t num_tag_directives() const; + size_t add_tag_directive(TagDirective const& td); + void clear_tag_directives(); + + size_t resolve_tag(substr output, csubstr tag, size_t node_id) const; + csubstr resolve_tag_sub(substr output, csubstr tag, size_t node_id) const + { + size_t needed = resolve_tag(output, tag, node_id); + return needed <= output.len ? output.first(needed) : output; + } + + using tag_directive_const_iterator = TagDirective const*; + tag_directive_const_iterator begin_tag_directives() const { return m_tag_directives; } + tag_directive_const_iterator end_tag_directives() const { return m_tag_directives + num_tag_directives(); } + + struct TagDirectiveProxy + { + tag_directive_const_iterator b, e; + tag_directive_const_iterator begin() const { return b; } + tag_directive_const_iterator end() const { return e; } + }; + + TagDirectiveProxy tag_directives() const { return TagDirectiveProxy{begin_tag_directives(), end_tag_directives()}; } + + /** @} */ + +public: + + /** @name modifying hierarchy */ + /** @{ */ + + /** create and insert a new child of @p parent. insert after the (to-be) + * sibling @p after, which must be a child of @p parent. To insert as the + * first child, set after to NONE */ + C4_ALWAYS_INLINE size_t insert_child(size_t parent, size_t after) + { + RYML_ASSERT(parent != NONE); + RYML_ASSERT(is_container(parent) || is_root(parent)); + RYML_ASSERT(after == NONE || (_p(after)->m_parent == parent)); + size_t child = _claim(); + _set_hierarchy(child, parent, after); + return child; + } + /** create and insert a node as the first child of @p parent */ + C4_ALWAYS_INLINE size_t prepend_child(size_t parent) { return insert_child(parent, NONE); } + /** create and insert a node as the last child of @p parent */ + C4_ALWAYS_INLINE size_t append_child(size_t parent) { return insert_child(parent, _p(parent)->m_last_child); } + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + //! create and insert a new sibling of n. insert after "after" + C4_ALWAYS_INLINE size_t insert_sibling(size_t node, size_t after) + { + return insert_child(_p(node)->m_parent, after); + } + /** create and insert a node as the first node of @p parent */ + C4_ALWAYS_INLINE size_t prepend_sibling(size_t node) { return prepend_child(_p(node)->m_parent); } + C4_ALWAYS_INLINE size_t append_sibling(size_t node) { return append_child(_p(node)->m_parent); } + +public: + + /** remove an entire branch at once: ie remove the children and the node itself */ + inline void remove(size_t node) + { + remove_children(node); + _release(node); + } + + /** remove all the node's children, but keep the node itself */ + void remove_children(size_t node); + + /** change the @p type of the node to one of MAP, SEQ or VAL. @p + * type must have one and only one of MAP,SEQ,VAL; @p type may + * possibly have KEY, but if it does, then the @p node must also + * have KEY. Changing to the same type is a no-op. Otherwise, + * changing to a different type will initialize the node with an + * empty value of the desired type: changing to VAL will + * initialize with a null scalar (~), changing to MAP will + * initialize with an empty map ({}), and changing to SEQ will + * initialize with an empty seq ([]). */ + bool change_type(size_t node, NodeType type); + + bool change_type(size_t node, type_bits type) + { + return change_type(node, (NodeType)type); + } + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + +public: + + /** change the node's position in the parent */ + void move(size_t node, size_t after); + + /** change the node's parent and position */ + void move(size_t node, size_t new_parent, size_t after); + + /** change the node's parent and position to a different tree + * @return the index of the new node in the destination tree */ + size_t move(Tree * src, size_t node, size_t new_parent, size_t after); + + /** ensure the first node is a stream. Eg, change this tree + * + * DOCMAP + * MAP + * KEYVAL + * KEYVAL + * SEQ + * VAL + * + * to + * + * STREAM + * DOCMAP + * MAP + * KEYVAL + * KEYVAL + * SEQ + * VAL + * + * If the root is already a stream, this is a no-op. + */ + void set_root_as_stream(); + +public: + + /** recursively duplicate a node from this tree into a new parent, + * placing it after one of its children + * @return the index of the copy */ + size_t duplicate(size_t node, size_t new_parent, size_t after); + /** recursively duplicate a node from a different tree into a new parent, + * placing it after one of its children + * @return the index of the copy */ + size_t duplicate(Tree const* src, size_t node, size_t new_parent, size_t after); + + /** recursively duplicate the node's children (but not the node) + * @return the index of the last duplicated child */ + size_t duplicate_children(size_t node, size_t parent, size_t after); + /** recursively duplicate the node's children (but not the node), where + * the node is from a different tree + * @return the index of the last duplicated child */ + size_t duplicate_children(Tree const* src, size_t node, size_t parent, size_t after); + + void duplicate_contents(size_t node, size_t where); + void duplicate_contents(Tree const* src, size_t node, size_t where); + + /** duplicate the node's children (but not the node) in a new parent, but + * omit repetitions where a duplicated node has the same key (in maps) or + * value (in seqs). If one of the duplicated children has the same key + * (in maps) or value (in seqs) as one of the parent's children, the one + * that is placed closest to the end will prevail. */ + size_t duplicate_children_no_rep(size_t node, size_t parent, size_t after); + size_t duplicate_children_no_rep(Tree const* src, size_t node, size_t parent, size_t after); + +public: + + void merge_with(Tree const* src, size_t src_node=NONE, size_t dst_root=NONE); + + /** @} */ + +public: + + /** @name internal string arena */ + /** @{ */ + + /** get the current size of the tree's internal arena */ + RYML_DEPRECATED("use arena_size() instead") size_t arena_pos() const { return m_arena_pos; } + /** get the current size of the tree's internal arena */ + inline size_t arena_size() const { return m_arena_pos; } + /** get the current capacity of the tree's internal arena */ + inline size_t arena_capacity() const { return m_arena.len; } + /** get the current slack of the tree's internal arena */ + inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; } + + /** get the current arena */ + substr arena() const { return m_arena.first(m_arena_pos); } + + /** return true if the given substring is part of the tree's string arena */ + bool in_arena(csubstr s) const + { + return m_arena.is_super(s); + } + + /** serialize the given floating-point variable to the tree's + * arena, growing it as needed to accomodate the serialization. + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * + * @see alloc_arena() */ + template + typename std::enable_if::value, csubstr>::type + to_arena(T const& C4_RESTRICT a) + { + substr rem(m_arena.sub(m_arena_pos)); + size_t num = to_chars_float(rem, a); + if(num > rem.len) + { + rem = _grow_arena(num); + num = to_chars_float(rem, a); + RYML_ASSERT(num <= rem.len); + } + rem = _request_span(num); + return rem; + } + + /** serialize the given non-floating-point variable to the tree's + * arena, growing it as needed to accomodate the serialization. + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * + * @see alloc_arena() */ + template + typename std::enable_if::value, csubstr>::type + to_arena(T const& C4_RESTRICT a) + { + substr rem(m_arena.sub(m_arena_pos)); + size_t num = to_chars(rem, a); + if(num > rem.len) + { + rem = _grow_arena(num); + num = to_chars(rem, a); + RYML_ASSERT(num <= rem.len); + } + rem = _request_span(num); + return rem; + } + + /** serialize the given csubstr to the tree's arena, growing the + * arena as needed to accomodate the serialization. + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * + * @see alloc_arena() */ + csubstr to_arena(csubstr a) + { + if(a.len > 0) + { + substr rem(m_arena.sub(m_arena_pos)); + size_t num = to_chars(rem, a); + if(num > rem.len) + { + rem = _grow_arena(num); + num = to_chars(rem, a); + RYML_ASSERT(num <= rem.len); + } + return _request_span(num); + } + else + { + if(a.str == nullptr) + { + return csubstr{}; + } + else if(m_arena.str == nullptr) + { + // Arena is empty and we want to store a non-null + // zero-length string. + // Even though the string has zero length, we need + // some "memory" to store a non-nullptr string + _grow_arena(1); + } + return _request_span(0); + } + } + C4_ALWAYS_INLINE csubstr to_arena(const char *s) + { + return to_arena(to_csubstr(s)); + } + C4_ALWAYS_INLINE csubstr to_arena(std::nullptr_t) + { + return csubstr{}; + } + + /** copy the given substr to the tree's arena, growing it by the + * required size + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena() + * + * @see alloc_arena() */ + substr copy_to_arena(csubstr s) + { + substr cp = alloc_arena(s.len); + RYML_ASSERT(cp.len == s.len); + RYML_ASSERT(!s.overlaps(cp)); + #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) + C4_SUPPRESS_WARNING_GCC_PUSH + C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow=") // no need for terminating \0 + C4_SUPPRESS_WARNING_GCC( "-Wrestrict") // there's an assert to ensure no violation of restrict behavior + #endif + if(s.len) + memcpy(cp.str, s.str, s.len); + #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) + C4_SUPPRESS_WARNING_GCC_POP + #endif + return cp; + } + + /** grow the tree's string arena by the given size and return a substr + * of the added portion + * + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual + * nodes, and thus cost O(numnodes)+O(arenasize). To avoid this + * cost, ensure that the arena is reserved to an appropriate size + * using .reserve_arena(). + * + * @see reserve_arena() */ + substr alloc_arena(size_t sz) + { + if(sz > arena_slack()) + _grow_arena(sz - arena_slack()); + substr s = _request_span(sz); + return s; + } + + /** ensure the tree's internal string arena is at least the given capacity + * @note This operation has a potential complexity of O(numNodes)+O(arenasize). + * Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual nodes. */ + void reserve_arena(size_t arena_cap) + { + if(arena_cap > m_arena.len) + { + substr buf; + buf.str = (char*) m_callbacks.m_allocate(arena_cap, m_arena.str, m_callbacks.m_user_data); + buf.len = arena_cap; + if(m_arena.str) + { + RYML_ASSERT(m_arena.len >= 0); + _relocate(buf); // does a memcpy and changes nodes using the arena + m_callbacks.m_free(m_arena.str, m_arena.len, m_callbacks.m_user_data); + } + m_arena = buf; + } + } + + /** @} */ + +private: + + substr _grow_arena(size_t more) + { + size_t cap = m_arena.len + more; + cap = cap < 2 * m_arena.len ? 2 * m_arena.len : cap; + cap = cap < 64 ? 64 : cap; + reserve_arena(cap); + return m_arena.sub(m_arena_pos); + } + + substr _request_span(size_t sz) + { + substr s; + s = m_arena.sub(m_arena_pos, sz); + m_arena_pos += sz; + return s; + } + + substr _relocated(csubstr s, substr next_arena) const + { + RYML_ASSERT(m_arena.is_super(s)); + RYML_ASSERT(m_arena.sub(0, m_arena_pos).is_super(s)); + auto pos = (s.str - m_arena.str); + substr r(next_arena.str + pos, s.len); + RYML_ASSERT(r.str - next_arena.str == pos); + RYML_ASSERT(next_arena.sub(0, m_arena_pos).is_super(r)); + return r; + } + +public: + + /** @name lookup */ + /** @{ */ + + struct lookup_result + { + size_t target; + size_t closest; + size_t path_pos; + csubstr path; + + inline operator bool() const { return target != NONE; } + + lookup_result() : target(NONE), closest(NONE), path_pos(0), path() {} + lookup_result(csubstr path_, size_t start) : target(NONE), closest(start), path_pos(0), path(path_) {} + + /** get the part ot the input path that was resolved */ + csubstr resolved() const; + /** get the part ot the input path that was unresolved */ + csubstr unresolved() const; + }; + + /** for example foo.bar[0].baz */ + lookup_result lookup_path(csubstr path, size_t start=NONE) const; + + /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify + * the tree so that the corresponding lookup_path() would return the + * default value. + * @see lookup_path() */ + size_t lookup_path_or_modify(csubstr default_value, csubstr path, size_t start=NONE); + + /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify + * the tree so that the corresponding lookup_path() would return the + * branch @p src_node (from the tree @p src). + * @see lookup_path() */ + size_t lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start=NONE); + + /** @} */ + +private: + + struct _lookup_path_token + { + csubstr value; + NodeType type; + _lookup_path_token() : value(), type() {} + _lookup_path_token(csubstr v, NodeType t) : value(v), type(t) {} + inline operator bool() const { return type != NOTYPE; } + bool is_index() const { return value.begins_with('[') && value.ends_with(']'); } + }; + + size_t _lookup_path_or_create(csubstr path, size_t start); + + void _lookup_path (lookup_result *r) const; + void _lookup_path_modify(lookup_result *r); + + size_t _next_node (lookup_result *r, _lookup_path_token *parent) const; + size_t _next_node_modify(lookup_result *r, _lookup_path_token *parent); + + void _advance(lookup_result *r, size_t more) const; + + _lookup_path_token _next_token(lookup_result *r, _lookup_path_token const& parent) const; + +private: + + void _clear(); + void _free(); + void _copy(Tree const& that); + void _move(Tree & that); + + void _relocate(substr next_arena); + +public: + + #if ! RYML_USE_ASSERT + C4_ALWAYS_INLINE void _check_next_flags(size_t, type_bits) {} + #else + void _check_next_flags(size_t node, type_bits f) + { + auto n = _p(node); + type_bits o = n->m_type; // old + C4_UNUSED(o); + if(f & MAP) + { + RYML_ASSERT_MSG((f & SEQ) == 0, "cannot mark simultaneously as map and seq"); + RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as map and val"); + RYML_ASSERT_MSG((o & SEQ) == 0, "cannot turn a seq into a map; clear first"); + RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a map; clear first"); + } + else if(f & SEQ) + { + RYML_ASSERT_MSG((f & MAP) == 0, "cannot mark simultaneously as seq and map"); + RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as seq and val"); + RYML_ASSERT_MSG((o & MAP) == 0, "cannot turn a map into a seq; clear first"); + RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a seq; clear first"); + } + if(f & KEY) + { + RYML_ASSERT(!is_root(node)); + auto pid = parent(node); C4_UNUSED(pid); + RYML_ASSERT(is_map(pid)); + } + if((f & VAL) && !is_root(node)) + { + auto pid = parent(node); C4_UNUSED(pid); + RYML_ASSERT(is_map(pid) || is_seq(pid)); + } + } + #endif + + inline void _set_flags(size_t node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; } + inline void _set_flags(size_t node, type_bits f) { _check_next_flags(node, f); _p(node)->m_type = f; } + + inline void _add_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f | d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _add_flags(size_t node, type_bits f) { NodeData *d = _p(node); f |= d->m_type; _check_next_flags(node, f); d->m_type = f; } + + inline void _rem_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _rem_flags(size_t node, type_bits f) { NodeData *d = _p(node); f = d->m_type & ~f; _check_next_flags(node, f); d->m_type = f; } + + void _set_key(size_t node, csubstr key, type_bits more_flags=0) + { + _p(node)->m_key.scalar = key; + _add_flags(node, KEY|more_flags); + } + void _set_key(size_t node, NodeScalar const& key, type_bits more_flags=0) + { + _p(node)->m_key = key; + _add_flags(node, KEY|more_flags); + } + + void _set_val(size_t node, csubstr val, type_bits more_flags=0) + { + RYML_ASSERT(num_children(node) == 0); + RYML_ASSERT(!is_seq(node) && !is_map(node)); + _p(node)->m_val.scalar = val; + _add_flags(node, VAL|more_flags); + } + void _set_val(size_t node, NodeScalar const& val, type_bits more_flags=0) + { + RYML_ASSERT(num_children(node) == 0); + RYML_ASSERT( ! is_container(node)); + _p(node)->m_val = val; + _add_flags(node, VAL|more_flags); + } + + void _set(size_t node, NodeInit const& i) + { + RYML_ASSERT(i._check()); + NodeData *n = _p(node); + RYML_ASSERT(n->m_key.scalar.empty() || i.key.scalar.empty() || i.key.scalar == n->m_key.scalar); + _add_flags(node, i.type); + if(n->m_key.scalar.empty()) + { + if( ! i.key.scalar.empty()) + { + _set_key(node, i.key.scalar); + } + } + n->m_key.tag = i.key.tag; + n->m_val = i.val; + } + + void _set_parent_as_container_if_needed(size_t in) + { + NodeData const* n = _p(in); + size_t ip = parent(in); + if(ip != NONE) + { + if( ! (is_seq(ip) || is_map(ip))) + { + if((in == first_child(ip)) && (in == last_child(ip))) + { + if( ! n->m_key.empty() || has_key(in)) + { + _add_flags(ip, MAP); + } + else + { + _add_flags(ip, SEQ); + } + } + } + } + } + + void _seq2map(size_t node) + { + RYML_ASSERT(is_seq(node)); + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + NodeData *C4_RESTRICT ch = _p(i); + if(ch->m_type.is_keyval()) + continue; + ch->m_type.add(KEY); + ch->m_key = ch->m_val; + } + auto *C4_RESTRICT n = _p(node); + n->m_type.rem(SEQ); + n->m_type.add(MAP); + } + + size_t _do_reorder(size_t *node, size_t count); + + void _swap(size_t n_, size_t m_); + void _swap_props(size_t n_, size_t m_); + void _swap_hierarchy(size_t n_, size_t m_); + void _copy_hierarchy(size_t dst_, size_t src_); + + inline void _copy_props(size_t dst_, size_t src_) + { + _copy_props(dst_, this, src_); + } + + inline void _copy_props_wo_key(size_t dst_, size_t src_) + { + _copy_props_wo_key(dst_, this, src_); + } + + void _copy_props(size_t dst_, Tree const* that_tree, size_t src_) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = src.m_type; + dst.m_key = src.m_key; + dst.m_val = src.m_val; + } + + void _copy_props_wo_key(size_t dst_, Tree const* that_tree, size_t src_) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = (src.m_type & ~_KEYMASK) | (dst.m_type & _KEYMASK); + dst.m_val = src.m_val; + } + + inline void _clear_type(size_t node) + { + _p(node)->m_type = NOTYPE; + } + + inline void _clear(size_t node) + { + auto *C4_RESTRICT n = _p(node); + n->m_type = NOTYPE; + n->m_key.clear(); + n->m_val.clear(); + n->m_parent = NONE; + n->m_first_child = NONE; + n->m_last_child = NONE; + } + + inline void _clear_key(size_t node) + { + _p(node)->m_key.clear(); + _rem_flags(node, KEY); + } + + inline void _clear_val(size_t node) + { + _p(node)->m_val.clear(); + _rem_flags(node, VAL); + } + +private: + + void _clear_range(size_t first, size_t num); + + size_t _claim(); + void _claim_root(); + void _release(size_t node); + void _free_list_add(size_t node); + void _free_list_rem(size_t node); + + void _set_hierarchy(size_t node, size_t parent, size_t after_sibling); + void _rem_hierarchy(size_t node); + +public: + + // members are exposed, but you should NOT access them directly + + NodeData * m_buf; + size_t m_cap; + + size_t m_size; + + size_t m_free_head; + size_t m_free_tail; + + substr m_arena; + size_t m_arena_pos; + + Callbacks m_callbacks; + + TagDirective m_tag_directives[RYML_MAX_TAG_DIRECTIVES]; + +}; + +} // namespace yml +} // namespace c4 + + +C4_SUPPRESS_WARNING_MSVC_POP +C4_SUPPRESS_WARNING_GCC_CLANG_POP + + +#endif /* _C4_YML_TREE_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/node.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_NODE_HPP_ +#define _C4_YML_NODE_HPP_ + +/** @file node.hpp + * @see NodeRef */ + +//included above: +//#include + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/base64.hpp +//#include "c4/base64.hpp" +#if !defined(C4_BASE64_HPP_) && !defined(_C4_BASE64_HPP_) +#error "amalgamate: file c4/base64.hpp must have been included at this point" +#endif /* C4_BASE64_HPP_ */ + + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" +#endif + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +#endif + +namespace c4 { +namespace yml { + +template struct Key { K & k; }; +template<> struct Key { fmt::const_base64_wrapper wrapper; }; +template<> struct Key { fmt::base64_wrapper wrapper; }; + +template C4_ALWAYS_INLINE Key key(K & k) { return Key{k}; } +C4_ALWAYS_INLINE Key key(fmt::const_base64_wrapper w) { return {w}; } +C4_ALWAYS_INLINE Key key(fmt::base64_wrapper w) { return {w}; } + +template void write(NodeRef *n, T const& v); + +template +typename std::enable_if< ! std::is_floating_point::value, bool>::type +read(NodeRef const& n, T *v); + +template +typename std::enable_if< std::is_floating_point::value, bool>::type +read(NodeRef const& n, T *v); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// forward decls +class NodeRef; +class ConstNodeRef; + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace detail { + +template +struct child_iterator +{ + using value_type = NodeRefType; + using tree_type = typename NodeRefType::tree_type; + + tree_type * C4_RESTRICT m_tree; + size_t m_child_id; + + child_iterator(tree_type * t, size_t id) : m_tree(t), m_child_id(id) {} + + child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; } + child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; } + + NodeRefType operator* () const { return NodeRefType(m_tree, m_child_id); } + NodeRefType operator-> () const { return NodeRefType(m_tree, m_child_id); } + + bool operator!= (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id != that.m_child_id; } + bool operator== (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id == that.m_child_id; } +}; + +template +struct children_view_ +{ + using n_iterator = child_iterator; + + n_iterator b, e; + + inline children_view_(n_iterator const& C4_RESTRICT b_, + n_iterator const& C4_RESTRICT e_) : b(b_), e(e_) {} + + inline n_iterator begin() const { return b; } + inline n_iterator end () const { return e; } +}; + +template +bool _visit(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +{ + size_t increment = 0; + if( ! (node.is_root() && skip_root)) + { + if(fn(node, indentation_level)) + return true; + ++increment; + } + if(node.has_children()) + { + for(auto ch : node.children()) + { + if(_visit(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root + { + return true; + } + } + } + return false; +} + +template +bool _visit_stacked(NodeRefType &node, Visitor fn, size_t indentation_level, bool skip_root=false) +{ + size_t increment = 0; + if( ! (node.is_root() && skip_root)) + { + if(fn(node, indentation_level)) + { + return true; + } + ++increment; + } + if(node.has_children()) + { + fn.push(node, indentation_level); + for(auto ch : node.children()) + { + if(_visit_stacked(ch, fn, indentation_level + increment, false)) // no need to forward skip_root as it won't be root + { + fn.pop(node, indentation_level); + return true; + } + } + fn.pop(node, indentation_level); + } + return false; +} + + +//----------------------------------------------------------------------------- + +/** a CRTP base for read-only node methods */ +template +struct RoNodeMethods +{ + C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wcast-align") + // helper CRTP macros, undefined at the end + #define tree_ ((ConstImpl const* C4_RESTRICT)this)->m_tree + #define id_ ((ConstImpl const* C4_RESTRICT)this)->m_id + #define tree__ ((Impl const* C4_RESTRICT)this)->m_tree + #define id__ ((Impl const* C4_RESTRICT)this)->m_id + // require valid + #define _C4RV() \ + RYML_ASSERT(tree_ != nullptr); \ + _RYML_CB_ASSERT(tree_->m_callbacks, id_ != NONE) + #define _C4_IF_MUTABLE(ty) typename std::enable_if::value, ty>::type + +public: + + /** @name node property getters */ + /** @{ */ + + /** returns the data or null when the id is NONE */ + C4_ALWAYS_INLINE C4_PURE NodeData const* get() const noexcept { RYML_ASSERT(tree_ != nullptr); return tree_->get(id_); } + /** returns the data or null when the id is NONE */ + template + C4_ALWAYS_INLINE C4_PURE auto get() noexcept -> _C4_IF_MUTABLE(NodeData*) { RYML_ASSERT(tree_ != nullptr); return tree__->get(id__); } + + C4_ALWAYS_INLINE C4_PURE NodeType type() const noexcept { _C4RV(); return tree_->type(id_); } + C4_ALWAYS_INLINE C4_PURE const char* type_str() const noexcept { return tree_->type_str(id_); } + + C4_ALWAYS_INLINE C4_PURE csubstr key() const noexcept { _C4RV(); return tree_->key(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr key_tag() const noexcept { _C4RV(); return tree_->key_tag(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr key_ref() const noexcept { _C4RV(); return tree_->key_ref(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr key_anchor() const noexcept { _C4RV(); return tree_->key_anchor(id_); } + + C4_ALWAYS_INLINE C4_PURE csubstr val() const noexcept { _C4RV(); return tree_->val(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr val_tag() const noexcept { _C4RV(); return tree_->val_tag(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr val_ref() const noexcept { _C4RV(); return tree_->val_ref(id_); } + C4_ALWAYS_INLINE C4_PURE csubstr val_anchor() const noexcept { _C4RV(); return tree_->val_anchor(id_); } + + C4_ALWAYS_INLINE C4_PURE NodeScalar const& keysc() const noexcept { _C4RV(); return tree_->keysc(id_); } + C4_ALWAYS_INLINE C4_PURE NodeScalar const& valsc() const noexcept { _C4RV(); return tree_->valsc(id_); } + + C4_ALWAYS_INLINE C4_PURE bool key_is_null() const noexcept { _C4RV(); return tree_->key_is_null(id_); } + C4_ALWAYS_INLINE C4_PURE bool val_is_null() const noexcept { _C4RV(); return tree_->val_is_null(id_); } + + /** @} */ + +public: + + /** @name node property predicates */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool empty() const noexcept { _C4RV(); return tree_->empty(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_stream() const noexcept { _C4RV(); return tree_->is_stream(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_doc() const noexcept { _C4RV(); return tree_->is_doc(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_container() const noexcept { _C4RV(); return tree_->is_container(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_map() const noexcept { _C4RV(); return tree_->is_map(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_seq() const noexcept { _C4RV(); return tree_->is_seq(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_val() const noexcept { _C4RV(); return tree_->has_val(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_key() const noexcept { _C4RV(); return tree_->has_key(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val() const noexcept { _C4RV(); return tree_->is_val(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_keyval() const noexcept { _C4RV(); return tree_->is_keyval(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_key_tag() const noexcept { _C4RV(); return tree_->has_key_tag(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_val_tag() const noexcept { _C4RV(); return tree_->has_val_tag(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_key_anchor() const noexcept { _C4RV(); return tree_->has_key_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_key_anchor() const noexcept { _C4RV(); return tree_->is_key_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_val_anchor() const noexcept { _C4RV(); return tree_->has_val_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val_anchor() const noexcept { _C4RV(); return tree_->is_val_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_anchor() const noexcept { _C4RV(); return tree_->has_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_anchor() const noexcept { _C4RV(); return tree_->is_anchor(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_key_ref() const noexcept { _C4RV(); return tree_->is_key_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val_ref() const noexcept { _C4RV(); return tree_->is_val_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_ref() const noexcept { _C4RV(); return tree_->is_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_anchor_or_ref() const noexcept { _C4RV(); return tree_->is_anchor_or_ref(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_key_quoted() const noexcept { _C4RV(); return tree_->is_key_quoted(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_val_quoted() const noexcept { _C4RV(); return tree_->is_val_quoted(id_); } + C4_ALWAYS_INLINE C4_PURE bool is_quoted() const noexcept { _C4RV(); return tree_->is_quoted(id_); } + C4_ALWAYS_INLINE C4_PURE bool parent_is_seq() const noexcept { _C4RV(); return tree_->parent_is_seq(id_); } + C4_ALWAYS_INLINE C4_PURE bool parent_is_map() const noexcept { _C4RV(); return tree_->parent_is_map(id_); } + + /** @} */ + +public: + + /** @name hierarchy predicates */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool is_root() const noexcept { _C4RV(); return tree_->is_root(id_); } + C4_ALWAYS_INLINE C4_PURE bool has_parent() const noexcept { _C4RV(); return tree_->has_parent(id_); } + + C4_ALWAYS_INLINE C4_PURE bool has_child(ConstImpl const& ch) const noexcept { _C4RV(); return tree_->has_child(id_, ch.m_id); } + C4_ALWAYS_INLINE C4_PURE bool has_child(csubstr name) const noexcept { _C4RV(); return tree_->has_child(id_, name); } + C4_ALWAYS_INLINE C4_PURE bool has_children() const noexcept { _C4RV(); return tree_->has_children(id_); } + + C4_ALWAYS_INLINE C4_PURE bool has_sibling(ConstImpl const& n) const noexcept { _C4RV(); return tree_->has_sibling(id_, n.m_id); } + C4_ALWAYS_INLINE C4_PURE bool has_sibling(csubstr name) const noexcept { _C4RV(); return tree_->has_sibling(id_, name); } + /** counts with this */ + C4_ALWAYS_INLINE C4_PURE bool has_siblings() const noexcept { _C4RV(); return tree_->has_siblings(id_); } + /** does not count with this */ + C4_ALWAYS_INLINE C4_PURE bool has_other_siblings() const noexcept { _C4RV(); return tree_->has_other_siblings(id_); } + + /** @} */ + +public: + + /** @name hierarchy getters */ + /** @{ */ + + + template + C4_ALWAYS_INLINE C4_PURE auto doc(size_t num) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->doc(num)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl doc(size_t num) const noexcept { _C4RV(); return {tree_, tree_->doc(num)}; } + + + template + C4_ALWAYS_INLINE C4_PURE auto parent() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->parent(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl parent() const noexcept { _C4RV(); return {tree_, tree_->parent(id_)}; } + + + /** O(#num_children) */ + C4_ALWAYS_INLINE C4_PURE size_t child_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(id_, n.m_id); } + C4_ALWAYS_INLINE C4_PURE size_t num_children() const noexcept { _C4RV(); return tree_->num_children(id_); } + + template + C4_ALWAYS_INLINE C4_PURE auto first_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_child(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl first_child() const noexcept { _C4RV(); return {tree_, tree_->first_child(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto last_child() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_child(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl last_child () const noexcept { _C4RV(); return {tree_, tree_->last_child (id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto child(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->child(id__, pos)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl child(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->child(id_, pos)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto find_child(csubstr name) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_child(id__, name)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl find_child(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_child(id_, name)}; } + + + /** O(#num_siblings) */ + C4_ALWAYS_INLINE C4_PURE size_t num_siblings() const noexcept { _C4RV(); return tree_->num_siblings(id_); } + C4_ALWAYS_INLINE C4_PURE size_t num_other_siblings() const noexcept { _C4RV(); return tree_->num_other_siblings(id_); } + C4_ALWAYS_INLINE C4_PURE size_t sibling_pos(ConstImpl const& n) const noexcept { _C4RV(); return tree_->child_pos(tree_->parent(id_), n.m_id); } + + template + C4_ALWAYS_INLINE C4_PURE auto prev_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->prev_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl prev_sibling() const noexcept { _C4RV(); return {tree_, tree_->prev_sibling(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto next_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->next_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl next_sibling() const noexcept { _C4RV(); return {tree_, tree_->next_sibling(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto first_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->first_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl first_sibling() const noexcept { _C4RV(); return {tree_, tree_->first_sibling(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto last_sibling() noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->last_sibling(id__)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl last_sibling () const noexcept { _C4RV(); return {tree_, tree_->last_sibling(id_)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto sibling(size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->sibling(id__, pos)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl sibling(size_t pos) const noexcept { _C4RV(); return {tree_, tree_->sibling(id_, pos)}; } + + template + C4_ALWAYS_INLINE C4_PURE auto find_sibling(csubstr name) noexcept -> _C4_IF_MUTABLE(Impl) { _C4RV(); return {tree__, tree__->find_sibling(id__, name)}; } + C4_ALWAYS_INLINE C4_PURE ConstImpl find_sibling(csubstr name) const noexcept { _C4RV(); return {tree_, tree_->find_sibling(id_, name)}; } + + + /** O(num_children) */ + C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (csubstr k) const noexcept + { + _C4RV(); + size_t ch = tree_->find_child(id_, k); + _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); + return {tree_, ch}; + } + /** Find child by key. O(num_children). returns a seed node if no such child is found. */ + template + C4_ALWAYS_INLINE C4_PURE auto operator[] (csubstr k) noexcept -> _C4_IF_MUTABLE(Impl) + { + _C4RV(); + size_t ch = tree__->find_child(id__, k); + return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, k); + } + + /** O(num_children) */ + C4_ALWAYS_INLINE C4_PURE ConstImpl operator[] (size_t pos) const noexcept + { + _C4RV(); + size_t ch = tree_->child(id_, pos); + _RYML_CB_ASSERT(tree_->m_callbacks, ch != NONE); + return {tree_, ch}; + } + + /** Find child by position. O(pos). returns a seed node if no such child is found. */ + template + C4_ALWAYS_INLINE C4_PURE auto operator[] (size_t pos) noexcept -> _C4_IF_MUTABLE(Impl) + { + _C4RV(); + size_t ch = tree__->child(id__, pos); + return ch != NONE ? Impl(tree__, ch) : NodeRef(tree__, id__, pos); + } + + /** @} */ + +public: + + /** deserialization */ + /** @{ */ + + template + ConstImpl const& operator>> (T &v) const + { + _C4RV(); + if( ! read((ConstImpl const&)*this, &v)) + _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize value"); + return *((ConstImpl const*)this); + } + + /** deserialize the node's key to the given variable */ + template + ConstImpl const& operator>> (Key v) const + { + _C4RV(); + if( ! from_chars(key(), &v.k)) + _RYML_CB_ERR(tree_->m_callbacks, "could not deserialize key"); + return *((ConstImpl const*)this); + } + + /** deserialize the node's key as base64 */ + ConstImpl const& operator>> (Key w) const + { + deserialize_key(w.wrapper); + return *((ConstImpl const*)this); + } + + /** deserialize the node's val as base64 */ + ConstImpl const& operator>> (fmt::base64_wrapper w) const + { + deserialize_val(w); + return *((ConstImpl const*)this); + } + + /** decode the base64-encoded key and assign the + * decoded blob to the given buffer/ + * @return the size of base64-decoded blob */ + size_t deserialize_key(fmt::base64_wrapper v) const + { + _C4RV(); + return from_chars(key(), &v); + } + /** decode the base64-encoded key and assign the + * decoded blob to the given buffer/ + * @return the size of base64-decoded blob */ + size_t deserialize_val(fmt::base64_wrapper v) const + { + _C4RV(); + return from_chars(val(), &v); + }; + + template + bool get_if(csubstr name, T *var) const + { + auto ch = find_child(name); + if(!ch.valid()) + return false; + ch >> *var; + return true; + } + + template + bool get_if(csubstr name, T *var, T const& fallback) const + { + auto ch = find_child(name); + if(ch.valid()) + { + ch >> *var; + return true; + } + else + { + *var = fallback; + return false; + } + } + + /** @} */ + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + /** @name iteration */ + /** @{ */ + + using iterator = detail::child_iterator; + using const_iterator = detail::child_iterator; + using children_view = detail::children_view_; + using const_children_view = detail::children_view_; + + template + C4_ALWAYS_INLINE C4_PURE auto begin() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, tree__->first_child(id__)); } + C4_ALWAYS_INLINE C4_PURE const_iterator begin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + C4_ALWAYS_INLINE C4_PURE const_iterator cbegin() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + + template + C4_ALWAYS_INLINE C4_PURE auto end() noexcept -> _C4_IF_MUTABLE(iterator) { _C4RV(); return iterator(tree__, NONE); } + C4_ALWAYS_INLINE C4_PURE const_iterator end() const noexcept { _C4RV(); return const_iterator(tree_, NONE); } + C4_ALWAYS_INLINE C4_PURE const_iterator cend() const noexcept { _C4RV(); return const_iterator(tree_, tree_->first_child(id_)); } + + /** get an iterable view over children */ + template + C4_ALWAYS_INLINE C4_PURE auto children() noexcept -> _C4_IF_MUTABLE(children_view) { _C4RV(); return children_view(begin(), end()); } + /** get an iterable view over children */ + C4_ALWAYS_INLINE C4_PURE const_children_view children() const noexcept { _C4RV(); return const_children_view(begin(), end()); } + /** get an iterable view over children */ + C4_ALWAYS_INLINE C4_PURE const_children_view cchildren() const noexcept { _C4RV(); return const_children_view(begin(), end()); } + + /** get an iterable view over all siblings (including the calling node) */ + template + C4_ALWAYS_INLINE C4_PURE auto siblings() noexcept -> _C4_IF_MUTABLE(children_view) + { + _C4RV(); + NodeData const *nd = tree__->get(id__); + return (nd->m_parent != NONE) ? // does it have a parent? + children_view(iterator(tree__, tree_->get(nd->m_parent)->m_first_child), iterator(tree__, NONE)) + : + children_view(end(), end()); + } + /** get an iterable view over all siblings (including the calling node) */ + C4_ALWAYS_INLINE C4_PURE const_children_view siblings() const noexcept + { + _C4RV(); + NodeData const *nd = tree_->get(id_); + return (nd->m_parent != NONE) ? // does it have a parent? + const_children_view(const_iterator(tree_, tree_->get(nd->m_parent)->m_first_child), const_iterator(tree_, NONE)) + : + const_children_view(end(), end()); + } + /** get an iterable view over all siblings (including the calling node) */ + C4_ALWAYS_INLINE C4_PURE const_children_view csiblings() const noexcept { return siblings(); } + + /** visit every child node calling fn(node) */ + template + C4_ALWAYS_INLINE C4_PURE bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept + { + return detail::_visit(*(ConstImpl*)this, fn, indentation_level, skip_root); + } + /** visit every child node calling fn(node) */ + template + auto visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept + -> _C4_IF_MUTABLE(bool) + { + return detail::_visit(*(Impl*)this, fn, indentation_level, skip_root); + } + + /** visit every child node calling fn(node, level) */ + template + C4_ALWAYS_INLINE C4_PURE bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const noexcept + { + return detail::_visit_stacked(*(ConstImpl*)this, fn, indentation_level, skip_root); + } + /** visit every child node calling fn(node, level) */ + template + auto visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) noexcept + -> _C4_IF_MUTABLE(bool) + { + return detail::_visit_stacked(*(Impl*)this, fn, indentation_level, skip_root); + } + + /** @} */ + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + + #undef _C4_IF_MUTABLE + #undef _C4RV + #undef tree_ + #undef tree__ + #undef id_ + #undef id__ + + C4_SUPPRESS_WARNING_GCC_CLANG_POP +}; + +} // namespace detail + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +class RYML_EXPORT ConstNodeRef : public detail::RoNodeMethods +{ +public: + + using tree_type = Tree const; + +public: + + Tree const* C4_RESTRICT m_tree; + size_t m_id; + + friend NodeRef; + friend struct detail::RoNodeMethods; + +public: + + /** @name construction */ + /** @{ */ + + ConstNodeRef() : m_tree(nullptr), m_id(NONE) {} + ConstNodeRef(Tree const &t) : m_tree(&t), m_id(t .root_id()) {} + ConstNodeRef(Tree const *t) : m_tree(t ), m_id(t->root_id()) {} + ConstNodeRef(Tree const *t, size_t id) : m_tree(t), m_id(id) {} + ConstNodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE) {} + + ConstNodeRef(ConstNodeRef const&) = default; + ConstNodeRef(ConstNodeRef &&) = default; + + ConstNodeRef(NodeRef const&); + ConstNodeRef(NodeRef &&); + + /** @} */ + +public: + + /** @name assignment */ + /** @{ */ + + ConstNodeRef& operator= (std::nullptr_t) { m_tree = nullptr; m_id = NONE; return *this; } + + ConstNodeRef& operator= (ConstNodeRef const&) = default; + ConstNodeRef& operator= (ConstNodeRef &&) = default; + + ConstNodeRef& operator= (NodeRef const&); + ConstNodeRef& operator= (NodeRef &&); + + + /** @} */ + +public: + + /** @name state queries */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool valid() const noexcept { return m_tree != nullptr && m_id != NONE; } + + /** @} */ + +public: + + /** @name member getters */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; } + C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; } + + /** @} */ + +public: + + /** @name comparisons */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE bool operator== (ConstNodeRef const& that) const noexcept { RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (ConstNodeRef const& that) const noexcept { RYML_ASSERT(that.m_tree == m_tree); return ! this->operator==(that); } + + C4_ALWAYS_INLINE C4_PURE bool operator== (std::nullptr_t) const noexcept { return m_tree == nullptr || m_id == NONE; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (std::nullptr_t) const noexcept { return ! this->operator== (nullptr); } + + C4_ALWAYS_INLINE C4_PURE bool operator== (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } + C4_ALWAYS_INLINE C4_PURE bool operator!= (csubstr val) const noexcept { RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } + + /** @} */ + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** a reference to a node in an existing yaml tree, offering a more + * convenient API than the index-based API used in the tree. */ +class RYML_EXPORT NodeRef : public detail::RoNodeMethods +{ +public: + + using tree_type = Tree; + using base_type = detail::RoNodeMethods; + +private: + + Tree *C4_RESTRICT m_tree; + size_t m_id; + + /** This member is used to enable lazy operator[] writing. When a child + * with a key or index is not found, m_id is set to the id of the parent + * and the asked-for key or index are stored in this member until a write + * does happen. Then it is given as key or index for creating the child. + * When a key is used, the csubstr stores it (so the csubstr's string is + * non-null and the csubstr's size is different from NONE). When an index is + * used instead, the csubstr's string is set to null, and only the csubstr's + * size is set to a value different from NONE. Otherwise, when operator[] + * does find the child then this member is empty: the string is null and + * the size is NONE. */ + csubstr m_seed; + + friend ConstNodeRef; + friend struct detail::RoNodeMethods; + + // require valid: a helper macro, undefined at the end + #define _C4RV() \ + RYML_ASSERT(m_tree != nullptr); \ + _RYML_CB_ASSERT(m_tree->m_callbacks, m_id != NONE && !is_seed()) + +public: + + /** @name construction */ + /** @{ */ + + NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); } + NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); } + NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; } + NodeRef(Tree *t, size_t id, csubstr seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {} + NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {} + + /** @} */ + +public: + + /** @name assignment */ + /** @{ */ + + NodeRef(NodeRef const&) = default; + NodeRef(NodeRef &&) = default; + + NodeRef& operator= (NodeRef const&) = default; + NodeRef& operator= (NodeRef &&) = default; + + /** @} */ + +public: + + /** @name state queries */ + /** @{ */ + + inline bool valid() const { return m_tree != nullptr && m_id != NONE; } + inline bool is_seed() const { return m_seed.str != nullptr || m_seed.len != NONE; } + + inline void _clear_seed() { /*do this manually or an assert is triggered*/ m_seed.str = nullptr; m_seed.len = NONE; } + + /** @} */ + +public: + + /** @name comparisons */ + /** @{ */ + + inline bool operator== (NodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid() && !that.is_seed()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); } + + inline bool operator== (ConstNodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + inline bool operator!= (ConstNodeRef const& that) const { return ! this->operator==(that); } + + inline bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); } + inline bool operator!= (std::nullptr_t) const { return m_tree != nullptr && m_id != NONE && !is_seed(); } + + inline bool operator== (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } + inline bool operator!= (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } + + //inline operator bool () const { return m_tree == nullptr || m_id == NONE || is_seed(); } + + /** @} */ + +public: + + /** @name node property getters */ + /** @{ */ + + C4_ALWAYS_INLINE C4_PURE Tree * tree() noexcept { return m_tree; } + C4_ALWAYS_INLINE C4_PURE Tree const* tree() const noexcept { return m_tree; } + + C4_ALWAYS_INLINE C4_PURE size_t id() const noexcept { return m_id; } + + /** @} */ + +public: + + /** @name node modifiers */ + /** @{ */ + + void change_type(NodeType t) { _C4RV(); m_tree->change_type(m_id, t); } + + void set_type(NodeType t) { _C4RV(); m_tree->_set_flags(m_id, t); } + void set_key(csubstr key) { _C4RV(); m_tree->_set_key(m_id, key); } + void set_val(csubstr val) { _C4RV(); m_tree->_set_val(m_id, val); } + void set_key_tag(csubstr key_tag) { _C4RV(); m_tree->set_key_tag(m_id, key_tag); } + void set_val_tag(csubstr val_tag) { _C4RV(); m_tree->set_val_tag(m_id, val_tag); } + void set_key_anchor(csubstr key_anchor) { _C4RV(); m_tree->set_key_anchor(m_id, key_anchor); } + void set_val_anchor(csubstr val_anchor) { _C4RV(); m_tree->set_val_anchor(m_id, val_anchor); } + void set_key_ref(csubstr key_ref) { _C4RV(); m_tree->set_key_ref(m_id, key_ref); } + void set_val_ref(csubstr val_ref) { _C4RV(); m_tree->set_val_ref(m_id, val_ref); } + + template + size_t set_key_serialized(T const& C4_RESTRICT k) + { + _C4RV(); + csubstr s = m_tree->to_arena(k); + m_tree->_set_key(m_id, s); + return s.len; + } + template + size_t set_val_serialized(T const& C4_RESTRICT v) + { + _C4RV(); + csubstr s = m_tree->to_arena(v); + m_tree->_set_val(m_id, s); + return s.len; + } + size_t set_val_serialized(std::nullptr_t) + { + _C4RV(); + m_tree->_set_val(m_id, csubstr{}); + return 0; + } + + /** encode a blob as base64, then assign the result to the node's key + * @return the size of base64-encoded blob */ + size_t set_key_serialized(fmt::const_base64_wrapper w); + /** encode a blob as base64, then assign the result to the node's val + * @return the size of base64-encoded blob */ + size_t set_val_serialized(fmt::const_base64_wrapper w); + +public: + + inline void clear() + { + if(is_seed()) + return; + m_tree->remove_children(m_id); + m_tree->_clear(m_id); + } + + inline void clear_key() + { + if(is_seed()) + return; + m_tree->_clear_key(m_id); + } + + inline void clear_val() + { + if(is_seed()) + return; + m_tree->_clear_val(m_id); + } + + inline void clear_children() + { + if(is_seed()) + return; + m_tree->remove_children(m_id); + } + + void create() { _apply_seed(); } + + inline void operator= (NodeType_e t) + { + _apply_seed(); + m_tree->_add_flags(m_id, t); + } + + inline void operator|= (NodeType_e t) + { + _apply_seed(); + m_tree->_add_flags(m_id, t); + } + + inline void operator= (NodeInit const& v) + { + _apply_seed(); + _apply(v); + } + + inline void operator= (NodeScalar const& v) + { + _apply_seed(); + _apply(v); + } + + inline void operator= (std::nullptr_t) + { + _apply_seed(); + _apply(csubstr{}); + } + + inline void operator= (csubstr v) + { + _apply_seed(); + _apply(v); + } + + template + inline void operator= (const char (&v)[N]) + { + _apply_seed(); + csubstr sv; + sv.assign(v); + _apply(sv); + } + + /** @} */ + +public: + + /** @name serialization */ + /** @{ */ + + /** serialize a variable to the arena */ + template + inline csubstr to_arena(T const& C4_RESTRICT s) + { + _C4RV(); + return m_tree->to_arena(s); + } + + /** serialize a variable, then assign the result to the node's val */ + inline NodeRef& operator<< (csubstr s) + { + // this overload is needed to prevent ambiguity (there's also + // operator<< for writing a substr to a stream) + _apply_seed(); + write(this, s); + RYML_ASSERT(val() == s); + return *this; + } + + template + inline NodeRef& operator<< (T const& C4_RESTRICT v) + { + _apply_seed(); + write(this, v); + return *this; + } + + /** serialize a variable, then assign the result to the node's key */ + template + inline NodeRef& operator<< (Key const& C4_RESTRICT v) + { + _apply_seed(); + set_key_serialized(v.k); + return *this; + } + + /** serialize a variable, then assign the result to the node's key */ + template + inline NodeRef& operator<< (Key const& C4_RESTRICT v) + { + _apply_seed(); + set_key_serialized(v.k); + return *this; + } + + NodeRef& operator<< (Key w) + { + set_key_serialized(w.wrapper); + return *this; + } + + NodeRef& operator<< (fmt::const_base64_wrapper w) + { + set_val_serialized(w); + return *this; + } + + /** @} */ + +private: + + void _apply_seed() + { + if(m_seed.str) // we have a seed key: use it to create the new child + { + //RYML_ASSERT(i.key.scalar.empty() || m_key == i.key.scalar || m_key.empty()); + m_id = m_tree->append_child(m_id); + m_tree->_set_key(m_id, m_seed); + m_seed.str = nullptr; + m_seed.len = NONE; + } + else if(m_seed.len != NONE) // we have a seed index: create a child at that position + { + RYML_ASSERT(m_tree->num_children(m_id) == m_seed.len); + m_id = m_tree->append_child(m_id); + m_seed.str = nullptr; + m_seed.len = NONE; + } + else + { + RYML_ASSERT(valid()); + } + } + + inline void _apply(csubstr v) + { + m_tree->_set_val(m_id, v); + } + + inline void _apply(NodeScalar const& v) + { + m_tree->_set_val(m_id, v); + } + + inline void _apply(NodeInit const& i) + { + m_tree->_set(m_id, i); + } + +public: + + /** @name modification of hierarchy */ + /** @{ */ + + inline NodeRef insert_child(NodeRef after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); + return r; + } + + inline NodeRef insert_child(NodeInit const& i, NodeRef after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); + r._apply(i); + return r; + } + + inline NodeRef prepend_child() + { + _C4RV(); + NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); + return r; + } + + inline NodeRef prepend_child(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); + r._apply(i); + return r; + } + + inline NodeRef append_child() + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_child(m_id)); + return r; + } + + inline NodeRef append_child(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_child(m_id)); + r._apply(i); + return r; + } + +public: + + inline NodeRef insert_sibling(ConstNodeRef const& after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); + return r; + } + + inline NodeRef insert_sibling(NodeInit const& i, ConstNodeRef const& after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); + r._apply(i); + return r; + } + + inline NodeRef prepend_sibling() + { + _C4RV(); + NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); + return r; + } + + inline NodeRef prepend_sibling(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); + r._apply(i); + return r; + } + + inline NodeRef append_sibling() + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_sibling(m_id)); + return r; + } + + inline NodeRef append_sibling(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_sibling(m_id)); + r._apply(i); + return r; + } + +public: + + inline void remove_child(NodeRef & child) + { + _C4RV(); + RYML_ASSERT(has_child(child)); + RYML_ASSERT(child.parent().id() == id()); + m_tree->remove(child.id()); + child.clear(); + } + + //! remove the nth child of this node + inline void remove_child(size_t pos) + { + _C4RV(); + RYML_ASSERT(pos >= 0 && pos < num_children()); + size_t child = m_tree->child(m_id, pos); + RYML_ASSERT(child != NONE); + m_tree->remove(child); + } + + //! remove a child by name + inline void remove_child(csubstr key) + { + _C4RV(); + size_t child = m_tree->find_child(m_id, key); + RYML_ASSERT(child != NONE); + m_tree->remove(child); + } + +public: + + /** change the node's position within its parent, placing it after + * @p after. To move to the first position in the parent, simply + * pass an empty or default-constructed reference like this: + * `n.move({})`. */ + inline void move(ConstNodeRef const& after) + { + _C4RV(); + m_tree->move(m_id, after.m_id); + } + + /** move the node to a different @p parent (which may belong to a + * different tree), placing it after @p after. When the + * destination parent is in a new tree, then this node's tree + * pointer is reset to the tree of the parent node. */ + inline void move(NodeRef const& parent, ConstNodeRef const& after) + { + _C4RV(); + if(parent.m_tree == m_tree) + { + m_tree->move(m_id, parent.m_id, after.m_id); + } + else + { + parent.m_tree->move(m_tree, m_id, parent.m_id, after.m_id); + m_tree = parent.m_tree; + } + } + + /** duplicate the current node somewhere within its parent, and + * place it after the node @p after. To place into the first + * position of the parent, simply pass an empty or + * default-constructed reference like this: `n.move({})`. */ + inline NodeRef duplicate(ConstNodeRef const& after) const + { + _C4RV(); + RYML_ASSERT(m_tree == after.m_tree || after.m_id == NONE); + size_t dup = m_tree->duplicate(m_id, m_tree->parent(m_id), after.m_id); + NodeRef r(m_tree, dup); + return r; + } + + /** duplicate the current node somewhere into a different @p parent + * (possibly from a different tree), and place it after the node + * @p after. To place into the first position of the parent, + * simply pass an empty or default-constructed reference like + * this: `n.move({})`. */ + inline NodeRef duplicate(NodeRef const& parent, ConstNodeRef const& after) const + { + _C4RV(); + RYML_ASSERT(parent.m_tree == after.m_tree || after.m_id == NONE); + if(parent.m_tree == m_tree) + { + size_t dup = m_tree->duplicate(m_id, parent.m_id, after.m_id); + NodeRef r(m_tree, dup); + return r; + } + else + { + size_t dup = parent.m_tree->duplicate(m_tree, m_id, parent.m_id, after.m_id); + NodeRef r(parent.m_tree, dup); + return r; + } + } + + inline void duplicate_children(NodeRef const& parent, ConstNodeRef const& after) const + { + _C4RV(); + RYML_ASSERT(parent.m_tree == after.m_tree); + if(parent.m_tree == m_tree) + { + m_tree->duplicate_children(m_id, parent.m_id, after.m_id); + } + else + { + parent.m_tree->duplicate_children(m_tree, m_id, parent.m_id, after.m_id); + } + } + + /** @} */ + +#undef _C4RV +}; + + +//----------------------------------------------------------------------------- + +inline ConstNodeRef::ConstNodeRef(NodeRef const& that) + : m_tree(that.m_tree) + , m_id(!that.is_seed() ? that.id() : NONE) +{ +} + +inline ConstNodeRef::ConstNodeRef(NodeRef && that) + : m_tree(that.m_tree) + , m_id(!that.is_seed() ? that.id() : NONE) +{ +} + + +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef const& that) +{ + m_tree = (that.m_tree); + m_id = (!that.is_seed() ? that.id() : NONE); + return *this; +} + +inline ConstNodeRef& ConstNodeRef::operator= (NodeRef && that) +{ + m_tree = (that.m_tree); + m_id = (!that.is_seed() ? that.id() : NONE); + return *this; +} + + +//----------------------------------------------------------------------------- + +template +inline void write(NodeRef *n, T const& v) +{ + n->set_val_serialized(v); +} + +template +typename std::enable_if< ! std::is_floating_point::value, bool>::type +inline read(NodeRef const& n, T *v) +{ + return from_chars(n.val(), v); +} +template +typename std::enable_if< ! std::is_floating_point::value, bool>::type +inline read(ConstNodeRef const& n, T *v) +{ + return from_chars(n.val(), v); +} + +template +typename std::enable_if::value, bool>::type +inline read(NodeRef const& n, T *v) +{ + return from_chars_float(n.val(), v); +} +template +typename std::enable_if::value, bool>::type +inline read(ConstNodeRef const& n, T *v) +{ + return from_chars_float(n.val(), v); +} + + +} // namespace yml +} // namespace c4 + + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif + +#endif /* _C4_YML_NODE_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/writer.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/writer.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_WRITER_HPP_ +#define _C4_YML_WRITER_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "./common.hpp" +#endif + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp +//#include +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + +//included above: +//#include // fwrite(), fputc() +//included above: +//#include // memcpy() + + +namespace c4 { +namespace yml { + + +/** Repeat-Character: a character to be written a number of times. */ +struct RepC +{ + char c; + size_t num_times; +}; +inline RepC indent_to(size_t num_levels) +{ + return {' ', size_t(2) * num_levels}; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A writer that outputs to a file. Defaults to stdout. */ +struct WriterFile +{ + FILE * m_file; + size_t m_pos; + + WriterFile(FILE *f = nullptr) : m_file(f ? f : stdout), m_pos(0) {} + + inline substr _get(bool /*error_on_excess*/) + { + substr sp; + sp.str = nullptr; + sp.len = m_pos; + return sp; + } + + template + inline void _do_write(const char (&a)[N]) + { + fwrite(a, sizeof(char), N - 1, m_file); + m_pos += N - 1; + } + + inline void _do_write(csubstr sp) + { + #if defined(__clang__) + # pragma clang diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #endif + if(sp.empty()) return; + fwrite(sp.str, sizeof(csubstr::char_type), sp.len, m_file); + m_pos += sp.len; + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + } + + inline void _do_write(const char c) + { + fputc(c, m_file); + ++m_pos; + } + + inline void _do_write(RepC const rc) + { + for(size_t i = 0; i < rc.num_times; ++i) + { + fputc(rc.c, m_file); + } + m_pos += rc.num_times; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A writer that outputs to an STL-like ostream. */ +template +struct WriterOStream +{ + OStream& m_stream; + size_t m_pos; + + WriterOStream(OStream &s) : m_stream(s), m_pos(0) {} + + inline substr _get(bool /*error_on_excess*/) + { + substr sp; + sp.str = nullptr; + sp.len = m_pos; + return sp; + } + + template + inline void _do_write(const char (&a)[N]) + { + m_stream.write(a, N - 1); + m_pos += N - 1; + } + + inline void _do_write(csubstr sp) + { + #if defined(__clang__) + # pragma clang diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #endif + if(sp.empty()) return; + m_stream.write(sp.str, sp.len); + m_pos += sp.len; + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + } + + inline void _do_write(const char c) + { + m_stream.put(c); + ++m_pos; + } + + inline void _do_write(RepC const rc) + { + for(size_t i = 0; i < rc.num_times; ++i) + { + m_stream.put(rc.c); + } + m_pos += rc.num_times; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** a writer to a substr */ +struct WriterBuf +{ + substr m_buf; + size_t m_pos; + + WriterBuf(substr sp) : m_buf(sp), m_pos(0) {} + + inline substr _get(bool error_on_excess) + { + if(m_pos <= m_buf.len) + { + return m_buf.first(m_pos); + } + if(error_on_excess) + { + c4::yml::error("not enough space in the given buffer"); + } + substr sp; + sp.str = nullptr; + sp.len = m_pos; + return sp; + } + + template + inline void _do_write(const char (&a)[N]) + { + RYML_ASSERT( ! m_buf.overlaps(a)); + if(m_pos + N-1 <= m_buf.len) + { + memcpy(&(m_buf[m_pos]), a, N-1); + } + m_pos += N-1; + } + + inline void _do_write(csubstr sp) + { + if(sp.empty()) return; + RYML_ASSERT( ! sp.overlaps(m_buf)); + if(m_pos + sp.len <= m_buf.len) + { + memcpy(&(m_buf[m_pos]), sp.str, sp.len); + } + m_pos += sp.len; + } + + inline void _do_write(const char c) + { + if(m_pos + 1 <= m_buf.len) + { + m_buf[m_pos] = c; + } + ++m_pos; + } + + inline void _do_write(RepC const rc) + { + if(m_pos + rc.num_times <= m_buf.len) + { + for(size_t i = 0; i < rc.num_times; ++i) + { + m_buf[m_pos + i] = rc.c; + } + } + m_pos += rc.num_times; + } +}; + + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_WRITER_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/writer.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/detail/parser_dbg.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_ +#define _C4_YML_DETAIL_PARSER_DBG_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "../common.hpp" +#endif +//included above: +//#include + +//----------------------------------------------------------------------------- +// some debugging scaffolds + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4068/*unknown pragma*/) +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunknown-pragmas" +//#pragma GCC diagnostic ignored "-Wpragma-system-header-outside-header" +#pragma GCC system_header + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Werror" +#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" + +// some debugging scaffolds +#ifdef RYML_DBG +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp +//#include +#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_) +#error "amalgamate: file c4/dump.hpp must have been included at this point" +#endif /* C4_DUMP_HPP_ */ + +namespace c4 { +inline void _dbg_dumper(csubstr s) { fwrite(s.str, 1, s.len, stdout); }; +template +void _dbg_printf(c4::csubstr fmt, Args&& ...args) +{ + static char writebuf[256]; + auto results = c4::format_dump_resume<&_dbg_dumper>(writebuf, fmt, std::forward(args)...); + // resume writing if the results failed to fit the buffer + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. + { + results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward(args)...); + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) + { + results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward(args)...); + } + } +} +} // namespace c4 + +# define _c4dbgt(fmt, ...) this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ ) +# define _c4dbgq(msg) _dbg_printf(msg "\n") +# define _c4err(fmt, ...) \ + do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ + this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0) +#else +# define _c4dbgt(fmt, ...) +# define _c4dbgpf(fmt, ...) +# define _c4dbgp(msg) +# define _c4dbgq(msg) +# define _c4err(fmt, ...) \ + do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ + this->_err("ERROR: " fmt, ## __VA_ARGS__); } while(0) +#endif + +#define _c4prsp(sp) sp +#define _c4presc(s) __c4presc(s.str, s.len) +inline c4::csubstr _c4prc(const char &C4_RESTRICT c) +{ + switch(c) + { + case '\n': return c4::csubstr("\\n"); + case '\t': return c4::csubstr("\\t"); + case '\0': return c4::csubstr("\\0"); + case '\r': return c4::csubstr("\\r"); + case '\f': return c4::csubstr("\\f"); + case '\b': return c4::csubstr("\\b"); + case '\v': return c4::csubstr("\\v"); + case '\a': return c4::csubstr("\\a"); + default: return c4::csubstr(&c, 1); + } +} +inline void __c4presc(const char *s, size_t len) +{ + size_t prev = 0; + for(size_t i = 0; i < len; ++i) + { + switch(s[i]) + { + case '\n' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('n'); putchar('\n'); prev = i+1; break; + case '\t' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('t'); prev = i+1; break; + case '\0' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('0'); prev = i+1; break; + case '\r' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('r'); prev = i+1; break; + case '\f' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('f'); prev = i+1; break; + case '\b' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('b'); prev = i+1; break; + case '\v' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('v'); prev = i+1; break; + case '\a' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('a'); prev = i+1; break; + case '\x1b': fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('e'); prev = i+1; break; + case -0x3e/*0xc2u*/: + if(i+1 < len) + { + if(s[i+1] == -0x60/*0xa0u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('_'); prev = i+2; ++i; + } + else if(s[i+1] == -0x7b/*0x85u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('N'); prev = i+2; ++i; + } + break; + } + case -0x1e/*0xe2u*/: + if(i+2 < len && s[i+1] == -0x80/*0x80u*/) + { + if(s[i+2] == -0x58/*0xa8u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('L'); prev = i+3; i += 2; + } + else if(s[i+2] == -0x57/*0xa9u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('P'); prev = i+3; i += 2; + } + break; + } + } + } + fwrite(s + prev, 1, len - prev, stdout); +} + +#pragma clang diagnostic pop +#pragma GCC diagnostic pop + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + + +#endif /* _C4_YML_DETAIL_PARSER_DBG_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp) + +#define C4_YML_EMIT_DEF_HPP_ + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/emit.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_EMIT_HPP_ +#define _C4_YML_EMIT_HPP_ + +#ifndef _C4_YML_WRITER_HPP_ +#include "./writer.hpp" +#endif + +#ifndef _C4_YML_TREE_HPP_ +#include "./tree.hpp" +#endif + +#ifndef _C4_YML_NODE_HPP_ +#include "./node.hpp" +#endif + + +#define RYML_DEPRECATE_EMIT \ + RYML_DEPRECATED("use emit_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") +#ifdef emit +#error "emit is defined, likely from a Qt include. This will cause a compilation error. See https://github.com/biojppm/rapidyaml/issues/120" +#endif +#define RYML_DEPRECATE_EMITRS \ + RYML_DEPRECATED("use emitrs_yaml() instead. See https://github.com/biojppm/rapidyaml/issues/120") + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace c4 { +namespace yml { + +template class Emitter; + +template +using EmitterOStream = Emitter>; +using EmitterFile = Emitter; +using EmitterBuf = Emitter; + +typedef enum { + EMIT_YAML = 0, + EMIT_JSON = 1 +} EmitType_e; + + +/** mark a tree or node to be emitted as json */ +struct as_json +{ + Tree const* tree; + size_t node; + as_json(Tree const& t) : tree(&t), node(t.empty() ? NONE : t.root_id()) {} + as_json(Tree const& t, size_t id) : tree(&t), node(id) {} + as_json(ConstNodeRef const& n) : tree(n.tree()), node(n.id()) {} +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +class Emitter : public Writer +{ +public: + + using Writer::Writer; + + /** emit! + * + * When writing to a buffer, returns a substr of the emitted YAML. + * If the given buffer has insufficient space, the returned span will + * be null and its size will be the needed space. No writes are done + * after the end of the buffer. + * + * When writing to a file, the returned substr will be null, but its + * length will be set to the number of bytes written. */ + substr emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess); + /** emit starting at the root node */ + substr emit_as(EmitType_e type, Tree const& t, bool error_on_excess=true); + /** emit the given node */ + substr emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess=true); + +private: + + Tree const* C4_RESTRICT m_tree; + + void _emit_yaml(size_t id); + void _do_visit_flow_sl(size_t id, size_t ilevel=0); + void _do_visit_flow_ml(size_t id, size_t ilevel=0, size_t do_indent=1); + void _do_visit_block(size_t id, size_t ilevel=0, size_t do_indent=1); + void _do_visit_block_container(size_t id, size_t next_level, size_t do_indent); + void _do_visit_json(size_t id); + +private: + + void _write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t level); + void _write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags); + + void _write_doc(size_t id); + void _write_scalar(csubstr s, bool was_quoted); + void _write_scalar_json(csubstr s, bool as_key, bool was_quoted); + void _write_scalar_literal(csubstr s, size_t level, bool as_key, bool explicit_indentation=false); + void _write_scalar_folded(csubstr s, size_t level, bool as_key); + void _write_scalar_squo(csubstr s, size_t level); + void _write_scalar_dquo(csubstr s, size_t level); + void _write_scalar_plain(csubstr s, size_t level); + + void _write_tag(csubstr tag) + { + if(!tag.begins_with('!')) + this->Writer::_do_write('!'); + this->Writer::_do_write(tag); + } + + enum : type_bits { + _keysc = (KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | ~(VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), + _valsc = ~(KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | (VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), + _keysc_json = (KEY) | ~(VAL), + _valsc_json = ~(KEY) | (VAL), + }; + + C4_ALWAYS_INLINE void _writek(size_t id, size_t level) { _write(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~_valsc, level); } + C4_ALWAYS_INLINE void _writev(size_t id, size_t level) { _write(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~_keysc, level); } + + C4_ALWAYS_INLINE void _writek_json(size_t id) { _write_json(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~(VAL)); } + C4_ALWAYS_INLINE void _writev_json(size_t id) { _write_json(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~(KEY)); } + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_yaml(Tree const& t, size_t id, FILE *f) +{ + EmitterFile em(f); + return em.emit_as(EMIT_YAML, t, id, /*error_on_excess*/true).len; +} +RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, size_t id, FILE *f) +{ + return emit_yaml(t, id, f); +} + +/** emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_json(Tree const& t, size_t id, FILE *f) +{ + EmitterFile em(f); + return em.emit_as(EMIT_JSON, t, id, /*error_on_excess*/true).len; +} + + +/** emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_yaml(Tree const& t, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit_as(EMIT_YAML, t, /*error_on_excess*/true).len; +} +RYML_DEPRECATE_EMIT inline size_t emit(Tree const& t, FILE *f=nullptr) +{ + return emit_yaml(t, f); +} + +/** emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_json(Tree const& t, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit_as(EMIT_JSON, t, /*error_on_excess*/true).len; +} + + +/** emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_yaml(ConstNodeRef const& r, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit_as(EMIT_YAML, r, /*error_on_excess*/true).len; +} +RYML_DEPRECATE_EMIT inline size_t emit(ConstNodeRef const& r, FILE *f=nullptr) +{ + return emit_yaml(r, f); +} + +/** emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_json(ConstNodeRef const& r, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit_as(EMIT_JSON, r, /*error_on_excess*/true).len; +} + + +//----------------------------------------------------------------------------- + +/** emit YAML to an STL-like ostream */ +template +inline OStream& operator<< (OStream& s, Tree const& t) +{ + EmitterOStream em(s); + em.emit_as(EMIT_YAML, t); + return s; +} + +/** emit YAML to an STL-like ostream + * @overload */ +template +inline OStream& operator<< (OStream& s, ConstNodeRef const& n) +{ + EmitterOStream em(s); + em.emit_as(EMIT_YAML, n); + return s; +} + +/** emit json to an STL-like stream */ +template +inline OStream& operator<< (OStream& s, as_json const& j) +{ + EmitterOStream em(s); + em.emit_as(EMIT_JSON, *j.tree, j.node, true); + return s; +} + + +//----------------------------------------------------------------------------- + + +/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_yaml(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_YAML, t, id, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +{ + return emit_yaml(t, id, buf, error_on_excess); +} + +/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_json(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_JSON, t, id, error_on_excess); +} + + +/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_yaml(Tree const& t, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_YAML, t, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(Tree const& t, substr buf, bool error_on_excess=true) +{ + return emit_yaml(t, buf, error_on_excess); +} + +/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_JSON, t, error_on_excess); +} + + +/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload + */ +inline substr emit_yaml(ConstNodeRef const& r, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_YAML, r, error_on_excess); +} +RYML_DEPRECATE_EMIT inline substr emit(ConstNodeRef const& r, substr buf, bool error_on_excess=true) +{ + return emit_yaml(r, buf, error_on_excess); +} + +/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload + */ +inline substr emit_json(ConstNodeRef const& r, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit_as(EMIT_JSON, r, error_on_excess); +} + + +//----------------------------------------------------------------------------- + +/** emit+resize: emit YAML to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted YAML. */ +template +substr emitrs_yaml(Tree const& t, size_t id, CharOwningContainer * cont) +{ + substr buf = to_substr(*cont); + substr ret = emit_yaml(t, id, buf, /*error_on_excess*/false); + if(ret.str == nullptr && ret.len > 0) + { + cont->resize(ret.len); + buf = to_substr(*cont); + ret = emit_yaml(t, id, buf, /*error_on_excess*/true); + } + return ret; +} +template +RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont) +{ + return emitrs_yaml(t, id, cont); +} + +/** emit+resize: emit JSON to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted JSON. */ +template +substr emitrs_json(Tree const& t, size_t id, CharOwningContainer * cont) +{ + substr buf = to_substr(*cont); + substr ret = emit_json(t, id, buf, /*error_on_excess*/false); + if(ret.str == nullptr && ret.len > 0) + { + cont->resize(ret.len); + buf = to_substr(*cont); + ret = emit_json(t, id, buf, /*error_on_excess*/true); + } + return ret; +} + + +/** emit+resize: emit YAML to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted YAML. */ +template +CharOwningContainer emitrs_yaml(Tree const& t, size_t id) +{ + CharOwningContainer c; + emitrs_yaml(t, id, &c); + return c; +} +template +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t, size_t id) +{ + CharOwningContainer c; + emitrs_yaml(t, id, &c); + return c; +} + +/** emit+resize: emit JSON to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted JSON. */ +template +CharOwningContainer emitrs_json(Tree const& t, size_t id) +{ + CharOwningContainer c; + emitrs_json(t, id, &c); + return c; +} + + +/** emit+resize: YAML to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted YAML. */ +template +substr emitrs_yaml(Tree const& t, CharOwningContainer * cont) +{ + if(t.empty()) + return {}; + return emitrs_yaml(t, t.root_id(), cont); +} +template +RYML_DEPRECATE_EMITRS substr emitrs(Tree const& t, CharOwningContainer * cont) +{ + return emitrs_yaml(t, cont); +} + +/** emit+resize: JSON to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted JSON. */ +template +substr emitrs_json(Tree const& t, CharOwningContainer * cont) +{ + if(t.empty()) + return {}; + return emitrs_json(t, t.root_id(), cont); +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template +CharOwningContainer emitrs_yaml(Tree const& t) +{ + CharOwningContainer c; + if(t.empty()) + return c; + emitrs_yaml(t, t.root_id(), &c); + return c; +} +template +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(Tree const& t) +{ + return emitrs_yaml(t); +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template +CharOwningContainer emitrs_json(Tree const& t) +{ + CharOwningContainer c; + if(t.empty()) + return c; + emitrs_json(t, t.root_id(), &c); + return c; +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template +substr emitrs_yaml(ConstNodeRef const& n, CharOwningContainer * cont) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + return emitrs_yaml(*n.tree(), n.id(), cont); +} +template +RYML_DEPRECATE_EMITRS substr emitrs(ConstNodeRef const& n, CharOwningContainer * cont) +{ + return emitrs_yaml(n, cont); +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template +substr emitrs_json(ConstNodeRef const& n, CharOwningContainer * cont) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + return emitrs_json(*n.tree(), n.id(), cont); +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template +CharOwningContainer emitrs_yaml(ConstNodeRef const& n) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + CharOwningContainer c; + emitrs_yaml(*n.tree(), n.id(), &c); + return c; +} +template +RYML_DEPRECATE_EMITRS CharOwningContainer emitrs(ConstNodeRef const& n) +{ + return emitrs_yaml(n); +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template +CharOwningContainer emitrs_json(ConstNodeRef const& n) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + CharOwningContainer c; + emitrs_json(*n.tree(), n.id(), &c); + return c; +} + +} // namespace yml +} // namespace c4 + +#undef RYML_DEPRECATE_EMIT +#undef RYML_DEPRECATE_EMITRS + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp +//#include "c4/yml/emit.def.hpp" +#if !defined(C4_YML_EMIT_DEF_HPP_) && !defined(_C4_YML_EMIT_DEF_HPP_) +#error "amalgamate: file c4/yml/emit.def.hpp must have been included at this point" +#endif /* C4_YML_EMIT_DEF_HPP_ */ + + +#endif /* _C4_YML_EMIT_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/emit.def.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_EMIT_DEF_HPP_ +#define _C4_YML_EMIT_DEF_HPP_ + +#ifndef _C4_YML_EMIT_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp +//#include "c4/yml/emit.hpp" +#if !defined(C4_YML_EMIT_HPP_) && !defined(_C4_YML_EMIT_HPP_) +#error "amalgamate: file c4/yml/emit.hpp must have been included at this point" +#endif /* C4_YML_EMIT_HPP_ */ + +#endif + +namespace c4 { +namespace yml { + +template +substr Emitter::emit_as(EmitType_e type, Tree const& t, size_t id, bool error_on_excess) +{ + if(t.empty()) + { + _RYML_CB_ASSERT(t.callbacks(), id == NONE); + return {}; + } + _RYML_CB_CHECK(t.callbacks(), id < t.size()); + m_tree = &t; + if(type == EMIT_YAML) + _emit_yaml(id); + else if(type == EMIT_JSON) + _do_visit_json(id); + else + _RYML_CB_ERR(m_tree->callbacks(), "unknown emit type"); + return this->Writer::_get(error_on_excess); +} + +template +substr Emitter::emit_as(EmitType_e type, Tree const& t, bool error_on_excess) +{ + if(t.empty()) + return {}; + return this->emit_as(type, t, t.root_id(), error_on_excess); +} + +template +substr Emitter::emit_as(EmitType_e type, ConstNodeRef const& n, bool error_on_excess) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + return this->emit_as(type, *n.tree(), n.id(), error_on_excess); +} + + +//----------------------------------------------------------------------------- + +template +void Emitter::_emit_yaml(size_t id) +{ + // save branches in the visitor by doing the initial stream/doc + // logic here, sparing the need to check stream/val/keyval inside + // the visitor functions + auto dispatch = [this](size_t node){ + NodeType ty = m_tree->type(node); + if(ty.marked_flow_sl()) + _do_visit_flow_sl(node, 0); + else if(ty.marked_flow_ml()) + _do_visit_flow_ml(node, 0); + else + { + _do_visit_block(node, 0); + } + }; + if(!m_tree->is_root(id)) + { + if(m_tree->is_container(id) && !m_tree->type(id).marked_flow()) + { + size_t ilevel = 0; + if(m_tree->has_key(id)) + { + this->Writer::_do_write(m_tree->key(id)); + this->Writer::_do_write(":\n"); + ++ilevel; + } + _do_visit_block_container(id, ilevel, ilevel); + return; + } + } + + auto *btd = m_tree->tag_directives().b; + auto *etd = m_tree->tag_directives().e; + auto write_tag_directives = [&btd, etd, this](size_t next_node){ + auto end = btd; + while(end < etd) + { + if(end->next_node_id > next_node) + break; + ++end; + } + for( ; btd != end; ++btd) + { + if(next_node != m_tree->first_child(m_tree->parent(next_node))) + this->Writer::_do_write("...\n"); + this->Writer::_do_write("%TAG "); + this->Writer::_do_write(btd->handle); + this->Writer::_do_write(' '); + this->Writer::_do_write(btd->prefix); + this->Writer::_do_write('\n'); + } + }; + if(m_tree->is_stream(id)) + { + if(m_tree->first_child(id) != NONE) + write_tag_directives(m_tree->first_child(id)); + for(size_t child = m_tree->first_child(id); child != NONE; child = m_tree->next_sibling(child)) + { + dispatch(child); + if(m_tree->next_sibling(child) != NONE) + write_tag_directives(m_tree->next_sibling(child)); + } + } + else if(m_tree->is_container(id)) + { + dispatch(id); + } + else if(m_tree->is_doc(id)) + { + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_container(id)); // checked above + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_val(id)); // so it must be a val + _write_doc(id); + } + else if(m_tree->is_keyval(id)) + { + _writek(id, 0); + this->Writer::_do_write(": "); + _writev(id, 0); + if(!m_tree->type(id).marked_flow()) + this->Writer::_do_write('\n'); + } + else if(m_tree->is_val(id)) + { + //this->Writer::_do_write("- "); + _writev(id, 0); + if(!m_tree->type(id).marked_flow()) + this->Writer::_do_write('\n'); + } + else if(m_tree->type(id) == NOTYPE) + { + ; + } + else + { + _RYML_CB_ERR(m_tree->callbacks(), "unknown type"); + } +} + +template +void Emitter::_write_doc(size_t id) +{ + RYML_ASSERT(m_tree->is_doc(id)); + if(!m_tree->is_root(id)) + { + RYML_ASSERT(m_tree->is_stream(m_tree->parent(id))); + this->Writer::_do_write("---"); + } + if(!m_tree->has_val(id)) // this is more frequent + { + if(m_tree->has_val_tag(id)) + { + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + _write_tag(m_tree->val_tag(id)); + } + if(m_tree->has_val_anchor(id)) + { + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(id)); + } + } + else // docval + { + RYML_ASSERT(m_tree->has_val(id)); + RYML_ASSERT(!m_tree->has_key(id)); + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + _writev(id, 0); + } + this->Writer::_do_write('\n'); +} + +template +void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) +{ + RYML_ASSERT(!m_tree->is_stream(node)); + RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); + RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + + if(m_tree->is_doc(node)) + { + _write_doc(node); + if(!m_tree->has_children(node)) + return; + } + else if(m_tree->is_container(node)) + { + RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); + + bool spc = false; // write a space + + if(m_tree->has_key(node)) + { + _writek(node, ilevel); + this->Writer::_do_write(':'); + spc = true; + } + + if(m_tree->has_val_tag(node)) + { + if(spc) + this->Writer::_do_write(' '); + _write_tag(m_tree->val_tag(node)); + spc = true; + } + + if(m_tree->has_val_anchor(node)) + { + if(spc) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(node)); + spc = true; + } + + if(spc) + this->Writer::_do_write(' '); + + if(m_tree->is_map(node)) + { + this->Writer::_do_write('{'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_seq(node)); + this->Writer::_do_write('['); + } + } // container + + for(size_t child = m_tree->first_child(node), count = 0; child != NONE; child = m_tree->next_sibling(child)) + { + if(count++) + this->Writer::_do_write(','); + if(m_tree->is_keyval(child)) + { + _writek(child, ilevel); + this->Writer::_do_write(": "); + _writev(child, ilevel); + } + else if(m_tree->is_val(child)) + { + _writev(child, ilevel); + } + else + { + // with single-line flow, we can never go back to block + _do_visit_flow_sl(child, ilevel + 1); + } + } + + if(m_tree->is_map(node)) + { + this->Writer::_do_write('}'); + } + else if(m_tree->is_seq(node)) + { + this->Writer::_do_write(']'); + } +} + +template +void Emitter::_do_visit_flow_ml(size_t id, size_t ilevel, size_t do_indent) +{ + C4_UNUSED(id); + C4_UNUSED(ilevel); + C4_UNUSED(do_indent); + RYML_CHECK(false/*not implemented*/); +} + +template +void Emitter::_do_visit_block_container(size_t node, size_t next_level, size_t do_indent) +{ + RepC ind = indent_to(do_indent * next_level); + + if(m_tree->is_seq(node)) + { + for(size_t child = m_tree->first_child(node); child != NONE; child = m_tree->next_sibling(child)) + { + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->has_key(child)); + if(m_tree->is_val(child)) + { + this->Writer::_do_write(ind); + this->Writer::_do_write("- "); + _writev(child, next_level); + this->Writer::_do_write('\n'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(child)); + NodeType ty = m_tree->type(child); + if(ty.marked_flow_sl()) + { + this->Writer::_do_write(ind); + this->Writer::_do_write("- "); + _do_visit_flow_sl(child, 0u); + this->Writer::_do_write('\n'); + } + else if(ty.marked_flow_ml()) + { + this->Writer::_do_write(ind); + this->Writer::_do_write("- "); + _do_visit_flow_ml(child, next_level, do_indent); + this->Writer::_do_write('\n'); + } + else + { + _do_visit_block(child, next_level, do_indent); + } + } + do_indent = true; + ind = indent_to(do_indent * next_level); + } + } + else // map + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node)); + for(size_t ich = m_tree->first_child(node); ich != NONE; ich = m_tree->next_sibling(ich)) + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_key(ich)); + if(m_tree->is_keyval(ich)) + { + this->Writer::_do_write(ind); + _writek(ich, next_level); + this->Writer::_do_write(": "); + _writev(ich, next_level); + this->Writer::_do_write('\n'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(ich)); + NodeType ty = m_tree->type(ich); + if(ty.marked_flow_sl()) + { + this->Writer::_do_write(ind); + _do_visit_flow_sl(ich, 0u); + this->Writer::_do_write('\n'); + } + else if(ty.marked_flow_ml()) + { + this->Writer::_do_write(ind); + _do_visit_flow_ml(ich, 0u); + this->Writer::_do_write('\n'); + } + else + { + _do_visit_block(ich, next_level, do_indent); + } + } + do_indent = true; + ind = indent_to(do_indent * next_level); + } + } +} + +template +void Emitter::_do_visit_block(size_t node, size_t ilevel, size_t do_indent) +{ + RYML_ASSERT(!m_tree->is_stream(node)); + RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); + RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + RepC ind = indent_to(do_indent * ilevel); + + if(m_tree->is_doc(node)) + { + _write_doc(node); + if(!m_tree->has_children(node)) + return; + } + else if(m_tree->is_container(node)) + { + RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); + + bool spc = false; // write a space + bool nl = false; // write a newline + + if(m_tree->has_key(node)) + { + this->Writer::_do_write(ind); + _writek(node, ilevel); + this->Writer::_do_write(':'); + spc = true; + } + else if(!m_tree->is_root(node)) + { + this->Writer::_do_write(ind); + this->Writer::_do_write('-'); + spc = true; + } + + if(m_tree->has_val_tag(node)) + { + if(spc) + this->Writer::_do_write(' '); + _write_tag(m_tree->val_tag(node)); + spc = true; + nl = true; + } + + if(m_tree->has_val_anchor(node)) + { + if(spc) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(node)); + spc = true; + nl = true; + } + + if(m_tree->has_children(node)) + { + if(m_tree->has_key(node)) + nl = true; + else + if(!m_tree->is_root(node) && !nl) + spc = true; + } + else + { + if(m_tree->is_seq(node)) + this->Writer::_do_write(" []\n"); + else if(m_tree->is_map(node)) + this->Writer::_do_write(" {}\n"); + return; + } + + if(spc && !nl) + this->Writer::_do_write(' '); + + do_indent = 0; + if(nl) + { + this->Writer::_do_write('\n'); + do_indent = 1; + } + } // container + + size_t next_level = ilevel + 1; + if(m_tree->is_root(node) || m_tree->is_doc(node)) + next_level = ilevel; // do not indent at top level + + _do_visit_block_container(node, next_level, do_indent); +} + +template +void Emitter::_do_visit_json(size_t id) +{ + _RYML_CB_CHECK(m_tree->callbacks(), !m_tree->is_stream(id)); // JSON does not have streams + if(m_tree->is_keyval(id)) + { + _writek_json(id); + this->Writer::_do_write(": "); + _writev_json(id); + } + else if(m_tree->is_val(id)) + { + _writev_json(id); + } + else if(m_tree->is_container(id)) + { + if(m_tree->has_key(id)) + { + _writek_json(id); + this->Writer::_do_write(": "); + } + if(m_tree->is_seq(id)) + this->Writer::_do_write('['); + else if(m_tree->is_map(id)) + this->Writer::_do_write('{'); + } // container + + for(size_t ich = m_tree->first_child(id); ich != NONE; ich = m_tree->next_sibling(ich)) + { + if(ich != m_tree->first_child(id)) + this->Writer::_do_write(','); + _do_visit_json(ich); + } + + if(m_tree->is_seq(id)) + this->Writer::_do_write(']'); + else if(m_tree->is_map(id)) + this->Writer::_do_write('}'); +} + +template +void Emitter::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t ilevel) +{ + if( ! sc.tag.empty()) + { + _write_tag(sc.tag); + this->Writer::_do_write(' '); + } + if(flags.has_anchor()) + { + RYML_ASSERT(flags.is_ref() != flags.has_anchor()); + RYML_ASSERT( ! sc.anchor.empty()); + this->Writer::_do_write('&'); + this->Writer::_do_write(sc.anchor); + this->Writer::_do_write(' '); + } + else if(flags.is_ref()) + { + if(sc.anchor != "<<") + this->Writer::_do_write('*'); + this->Writer::_do_write(sc.anchor); + return; + } + + // ensure the style flags only have one of KEY or VAL + _RYML_CB_ASSERT(m_tree->callbacks(), ((flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE)) == 0) || (((flags&_WIP_KEY_STYLE) == 0) != ((flags&_WIP_VAL_STYLE) == 0))); + + auto style_marks = flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE); + if(style_marks & (_WIP_KEY_LITERAL|_WIP_VAL_LITERAL)) + { + _write_scalar_literal(sc.scalar, ilevel, flags.has_key()); + } + else if(style_marks & (_WIP_KEY_FOLDED|_WIP_VAL_FOLDED)) + { + _write_scalar_folded(sc.scalar, ilevel, flags.has_key()); + } + else if(style_marks & (_WIP_KEY_SQUO|_WIP_VAL_SQUO)) + { + _write_scalar_squo(sc.scalar, ilevel); + } + else if(style_marks & (_WIP_KEY_DQUO|_WIP_VAL_DQUO)) + { + _write_scalar_dquo(sc.scalar, ilevel); + } + else if(style_marks & (_WIP_KEY_PLAIN|_WIP_VAL_PLAIN)) + { + _write_scalar_plain(sc.scalar, ilevel); + } + else if(!style_marks) + { + size_t first_non_nl = sc.scalar.first_not_of('\n'); + bool all_newlines = first_non_nl == npos; + bool has_leading_ws = (!all_newlines) && sc.scalar.sub(first_non_nl).begins_with_any(" \t"); + bool do_literal = ((!sc.scalar.empty() && all_newlines) || (has_leading_ws && !sc.scalar.trim(' ').empty())); + if(do_literal) + { + _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); + } + else + { + for(size_t i = 0; i < sc.scalar.len; ++i) + { + if(sc.scalar.str[i] == '\n') + { + _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); + goto wrote_special; + } + // todo: check for escaped characters requiring double quotes + } + _write_scalar(sc.scalar, flags.is_quoted()); + wrote_special: + ; + } + } + else + { + _RYML_CB_ERR(m_tree->callbacks(), "not implemented"); + } +} +template +void Emitter::_write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags) +{ + if(C4_UNLIKELY( ! sc.tag.empty())) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have tags"); + if(C4_UNLIKELY(flags.has_anchor())) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have anchors"); + _write_scalar_json(sc.scalar, flags.has_key(), flags.is_quoted()); +} + +#define _rymlindent_nextline() for(size_t lv = 0; lv < ilevel+1; ++lv) { this->Writer::_do_write(' '); this->Writer::_do_write(' '); } + +template +void Emitter::_write_scalar_literal(csubstr s, size_t ilevel, bool explicit_key, bool explicit_indentation) +{ + if(explicit_key) + this->Writer::_do_write("? "); + csubstr trimmed = s.trimr("\n\r"); + size_t numnewlines_at_end = s.len - trimmed.len - s.sub(trimmed.len).count('\r'); + // + if(!explicit_indentation) + this->Writer::_do_write('|'); + else + this->Writer::_do_write("|2"); + // + if(numnewlines_at_end > 1 || (trimmed.len == 0 && s.len > 0)/*only newlines*/) + this->Writer::_do_write("+\n"); + else if(numnewlines_at_end == 1) + this->Writer::_do_write('\n'); + else + this->Writer::_do_write("-\n"); + // + if(trimmed.len) + { + size_t pos = 0; // tracks the last character that was already written + for(size_t i = 0; i < trimmed.len; ++i) + { + if(trimmed[i] != '\n') + continue; + // write everything up to this point + csubstr since_pos = trimmed.range(pos, i+1); // include the newline + _rymlindent_nextline() + this->Writer::_do_write(since_pos); + pos = i+1; // already written + } + if(pos < trimmed.len) + { + _rymlindent_nextline() + this->Writer::_do_write(trimmed.sub(pos)); + } + if(numnewlines_at_end) + { + this->Writer::_do_write('\n'); + --numnewlines_at_end; + } + } + for(size_t i = 0; i < numnewlines_at_end; ++i) + { + _rymlindent_nextline() + if(i+1 < numnewlines_at_end || explicit_key) + this->Writer::_do_write('\n'); + } + if(explicit_key && !numnewlines_at_end) + this->Writer::_do_write('\n'); +} + +template +void Emitter::_write_scalar_folded(csubstr s, size_t ilevel, bool explicit_key) +{ + if(explicit_key) + { + this->Writer::_do_write("? "); + } + RYML_ASSERT(s.find("\r") == csubstr::npos); + csubstr trimmed = s.trimr('\n'); + size_t numnewlines_at_end = s.len - trimmed.len; + if(numnewlines_at_end == 0) + { + this->Writer::_do_write(">-\n"); + } + else if(numnewlines_at_end == 1) + { + this->Writer::_do_write(">\n"); + } + else if(numnewlines_at_end > 1) + { + this->Writer::_do_write(">+\n"); + } + if(trimmed.len) + { + size_t pos = 0; // tracks the last character that was already written + for(size_t i = 0; i < trimmed.len; ++i) + { + if(trimmed[i] != '\n') + continue; + // write everything up to this point + csubstr since_pos = trimmed.range(pos, i+1); // include the newline + pos = i+1; // because of the newline + _rymlindent_nextline() + this->Writer::_do_write(since_pos); + this->Writer::_do_write('\n'); // write the newline twice + } + if(pos < trimmed.len) + { + _rymlindent_nextline() + this->Writer::_do_write(trimmed.sub(pos)); + } + if(numnewlines_at_end) + { + this->Writer::_do_write('\n'); + --numnewlines_at_end; + } + } + for(size_t i = 0; i < numnewlines_at_end; ++i) + { + _rymlindent_nextline() + if(i+1 < numnewlines_at_end || explicit_key) + this->Writer::_do_write('\n'); + } + if(explicit_key && !numnewlines_at_end) + this->Writer::_do_write('\n'); +} + +template +void Emitter::_write_scalar_squo(csubstr s, size_t ilevel) +{ + size_t pos = 0; // tracks the last character that was already written + this->Writer::_do_write('\''); + for(size_t i = 0; i < s.len; ++i) + { + if(s[i] == '\n') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this char + this->Writer::_do_write('\n'); // write the character again + if(i + 1 < s.len) + _rymlindent_nextline() // indent the next line + pos = i+1; + } + else if(s[i] == '\'') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this char + this->Writer::_do_write('\''); // write the character again + pos = i+1; + } + } + // write missing characters at the end of the string + if(pos < s.len) + this->Writer::_do_write(s.sub(pos)); + this->Writer::_do_write('\''); +} + +template +void Emitter::_write_scalar_dquo(csubstr s, size_t ilevel) +{ + size_t pos = 0; // tracks the last character that was already written + this->Writer::_do_write('"'); + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s.str[i]; + if(curr == '"' || curr == '\\') + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write('\\'); // write the escape + this->Writer::_do_write(curr); // write the char + pos = i+1; + } + else if(s[i] == '\n') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this newline + this->Writer::_do_write('\n'); // write the newline again + if(i + 1 < s.len) + _rymlindent_nextline() // indent the next line + pos = i+1; + if(i+1 < s.len) // escape leading whitespace after the newline + { + const char next = s.str[i+1]; + if(next == ' ' || next == '\t') + this->Writer::_do_write('\\'); + } + } + else if(curr == ' ' || curr == '\t') + { + // escape trailing whitespace before a newline + size_t next = s.first_not_of(" \t\r", i); + if(next != npos && s[next] == '\n') + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write('\\'); // escape the whitespace + pos = i; + } + } + else if(C4_UNLIKELY(curr == '\r')) + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write("\\r"); // write the escaped char + pos = i+1; + } + } + // write missing characters at the end of the string + if(pos < s.len) + { + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); + } + this->Writer::_do_write('"'); +} + +template +void Emitter::_write_scalar_plain(csubstr s, size_t ilevel) +{ + size_t pos = 0; // tracks the last character that was already written + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s.str[i]; + if(curr == '\n') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this newline + this->Writer::_do_write('\n'); // write the newline again + if(i + 1 < s.len) + _rymlindent_nextline() // indent the next line + pos = i+1; + } + } + // write missing characters at the end of the string + if(pos < s.len) + { + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); + } +} + +#undef _rymlindent_nextline + +template +void Emitter::_write_scalar(csubstr s, bool was_quoted) +{ + // this block of code needed to be moved to before the needs_quotes + // assignment to work around a g++ optimizer bug where (s.str != nullptr) + // was evaluated as true even if s.str was actually a nullptr (!!!) + if(s.len == size_t(0)) + { + if(was_quoted || s.str != nullptr) + this->Writer::_do_write("''"); + return; + } + + const bool needs_quotes = ( + was_quoted + || + ( + ( ! s.is_number()) + && + ( + // has leading whitespace + // looks like reference or anchor + // would be treated as a directive + // see https://www.yaml.info/learn/quote.html#noplain + s.begins_with_any(" \n\t\r*&%@`") + || + s.begins_with("<<") + || + // has trailing whitespace + s.ends_with_any(" \n\t\r") + || + // has special chars + (s.first_of("#:-?,\n{}[]'\"") != npos) + ) + ) + ); + + if( ! needs_quotes) + { + this->Writer::_do_write(s); + } + else + { + const bool has_dquotes = s.first_of( '"') != npos; + const bool has_squotes = s.first_of('\'') != npos; + if(!has_squotes && has_dquotes) + { + this->Writer::_do_write('\''); + this->Writer::_do_write(s); + this->Writer::_do_write('\''); + } + else if(has_squotes && !has_dquotes) + { + RYML_ASSERT(s.count('\n') == 0); + this->Writer::_do_write('"'); + this->Writer::_do_write(s); + this->Writer::_do_write('"'); + } + else + { + _write_scalar_squo(s, /*FIXME FIXME FIXME*/0); + } + } +} +template +void Emitter::_write_scalar_json(csubstr s, bool as_key, bool use_quotes) +{ + if((!use_quotes) + // json keys require quotes + && (!as_key) + && ( + // do not quote special cases + (s == "true" || s == "false" || s == "null") + || ( + // do not quote numbers + (s.is_number() + && ( + // quote integral numbers if they have a leading 0 + // https://github.com/biojppm/rapidyaml/issues/291 + (!(s.len > 1 && s.begins_with('0'))) + // do not quote reals with leading 0 + // https://github.com/biojppm/rapidyaml/issues/313 + || (s.find('.') != csubstr::npos) )) + ) + ) + ) + { + this->Writer::_do_write(s); + } + else + { + size_t pos = 0; + this->Writer::_do_write('"'); + for(size_t i = 0; i < s.len; ++i) + { + switch(s.str[i]) + { + case '"': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\""); + pos = i + 1; + break; + case '\n': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\n"); + pos = i + 1; + break; + case '\t': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\t"); + pos = i + 1; + break; + case '\\': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\\\"); + pos = i + 1; + break; + case '\r': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\r"); + pos = i + 1; + break; + case '\b': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\b"); + pos = i + 1; + break; + case '\f': + this->Writer ::_do_write(s.range(pos, i)); + this->Writer ::_do_write("\\f"); + pos = i + 1; + break; + } + } + if(pos < s.len) + { + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); + } + this->Writer::_do_write('"'); + } +} + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_EMIT_DEF_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/detail/stack.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_DETAIL_STACK_HPP_ +#define _C4_YML_DETAIL_STACK_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +//included above: +//#include "../common.hpp" +#endif + +#ifdef RYML_DBG +//included above: +//# include +#endif + +//included above: +//#include + +namespace c4 { +namespace yml { +namespace detail { + +/** A lightweight contiguous stack with SSO. This avoids a dependency on std. */ +template +class stack +{ + static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); + static_assert(std::is_trivially_destructible::value, "T must be trivially destructible"); + + enum : size_t { sso_size = N }; + +public: + + T m_buf[N]; + T * m_stack; + size_t m_size; + size_t m_capacity; + Callbacks m_callbacks; + +public: + + constexpr static bool is_contiguous() { return true; } + + stack(Callbacks const& cb) + : m_buf() + , m_stack(m_buf) + , m_size(0) + , m_capacity(N) + , m_callbacks(cb) {} + stack() : stack(get_callbacks()) {} + ~stack() + { + _free(); + } + + stack(stack const& that) noexcept : stack(that.m_callbacks) + { + resize(that.m_size); + _cp(&that); + } + + stack(stack &&that) noexcept : stack(that.m_callbacks) + { + _mv(&that); + } + + stack& operator= (stack const& that) noexcept + { + _cb(that.m_callbacks); + resize(that.m_size); + _cp(&that); + return *this; + } + + stack& operator= (stack &&that) noexcept + { + _cb(that.m_callbacks); + _mv(&that); + return *this; + } + +public: + + size_t size() const { return m_size; } + size_t empty() const { return m_size == 0; } + size_t capacity() const { return m_capacity; } + + void clear() + { + m_size = 0; + } + + void resize(size_t sz) + { + reserve(sz); + m_size = sz; + } + + void reserve(size_t sz); + + void push(T const& C4_RESTRICT n) + { + RYML_ASSERT((const char*)&n + sizeof(T) < (const char*)m_stack || &n > m_stack + m_capacity); + if(m_size == m_capacity) + { + size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + reserve(cap); + } + m_stack[m_size] = n; + ++m_size; + } + + void push_top() + { + RYML_ASSERT(m_size > 0); + if(m_size == m_capacity) + { + size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + reserve(cap); + } + m_stack[m_size] = m_stack[m_size - 1]; + ++m_size; + } + + T const& C4_RESTRICT pop() + { + RYML_ASSERT(m_size > 0); + --m_size; + return m_stack[m_size]; + } + + C4_ALWAYS_INLINE T const& C4_RESTRICT top() const { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top() { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom() const { RYML_ASSERT(m_size > 0); return m_stack[0]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom() { RYML_ASSERT(m_size > 0); return m_stack[0]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT top(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top(size_t i) { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom(size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT operator[](size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT operator[](size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + +public: + + using iterator = T *; + using const_iterator = T const *; + + iterator begin() { return m_stack; } + iterator end () { return m_stack + m_size; } + + const_iterator begin() const { return (const_iterator)m_stack; } + const_iterator end () const { return (const_iterator)m_stack + m_size; } + +public: + void _free(); + void _cp(stack const* C4_RESTRICT that); + void _mv(stack * that); + void _cb(Callbacks const& cb); +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +void stack::reserve(size_t sz) +{ + if(sz <= m_size) + return; + if(sz <= N) + { + m_stack = m_buf; + m_capacity = N; + return; + } + T *buf = (T*) m_callbacks.m_allocate(sz * sizeof(T), m_stack, m_callbacks.m_user_data); + memcpy(buf, m_stack, m_size * sizeof(T)); + if(m_stack != m_buf) + { + m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + } + m_stack = buf; + m_capacity = sz; +} + + +//----------------------------------------------------------------------------- + +template +void stack::_free() +{ + RYML_ASSERT(m_stack != nullptr); // this structure cannot be memset() to zero + if(m_stack != m_buf) + { + m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + m_stack = m_buf; + m_size = N; + m_capacity = N; + } + else + { + RYML_ASSERT(m_capacity == N); + } +} + + +//----------------------------------------------------------------------------- + +template +void stack::_cp(stack const* C4_RESTRICT that) +{ + if(that->m_stack != that->m_buf) + { + RYML_ASSERT(that->m_capacity > N); + RYML_ASSERT(that->m_size <= that->m_capacity); + } + else + { + RYML_ASSERT(that->m_capacity <= N); + RYML_ASSERT(that->m_size <= that->m_capacity); + } + memcpy(m_stack, that->m_stack, that->m_size * sizeof(T)); + m_size = that->m_size; + m_capacity = that->m_size < N ? N : that->m_size; + m_callbacks = that->m_callbacks; +} + + +//----------------------------------------------------------------------------- + +template +void stack::_mv(stack * that) +{ + if(that->m_stack != that->m_buf) + { + RYML_ASSERT(that->m_capacity > N); + RYML_ASSERT(that->m_size <= that->m_capacity); + m_stack = that->m_stack; + } + else + { + RYML_ASSERT(that->m_capacity <= N); + RYML_ASSERT(that->m_size <= that->m_capacity); + memcpy(m_buf, that->m_buf, that->m_size * sizeof(T)); + m_stack = m_buf; + } + m_size = that->m_size; + m_capacity = that->m_capacity; + m_callbacks = that->m_callbacks; + // make sure no deallocation happens on destruction + RYML_ASSERT(that->m_stack != m_buf); + that->m_stack = that->m_buf; + that->m_capacity = N; + that->m_size = 0; +} + + +//----------------------------------------------------------------------------- + +template +void stack::_cb(Callbacks const& cb) +{ + if(cb != m_callbacks) + { + _free(); + m_callbacks = cb; + } +} + +} // namespace detail +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_DETAIL_STACK_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/parse.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_PARSE_HPP_ +#define _C4_YML_PARSE_HPP_ + +#ifndef _C4_YML_TREE_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +#endif + +#ifndef _C4_YML_NODE_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +#endif + +#ifndef _C4_YML_DETAIL_STACK_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp +//#include "c4/yml/detail/stack.hpp" +#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_) +#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_STACK_HPP_ */ + +#endif + +//included above: +//#include + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) +#endif + +namespace c4 { +namespace yml { + +struct RYML_EXPORT ParserOptions +{ +private: + + typedef enum : uint32_t { + LOCATIONS = (1 << 0), + DEFAULTS = 0, + } Flags_e; + + uint32_t flags = DEFAULTS; +public: + ParserOptions() = default; + + /** @name source location tracking */ + /** @{ */ + + /** enable/disable source location tracking */ + ParserOptions& locations(bool enabled) + { + if(enabled) + flags |= LOCATIONS; + else + flags &= ~LOCATIONS; + return *this; + } + bool locations() const { return (flags & LOCATIONS) != 0u; } + + /** @} */ +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +class RYML_EXPORT Parser +{ +public: + + /** @name construction and assignment */ + /** @{ */ + + Parser(Callbacks const& cb, ParserOptions opts={}); + Parser(ParserOptions opts={}) : Parser(get_callbacks(), opts) {} + ~Parser(); + + Parser(Parser &&); + Parser(Parser const&); + Parser& operator=(Parser &&); + Parser& operator=(Parser const&); + + /** @} */ + +public: + + /** @name modifiers */ + /** @{ */ + + /** Reserve a certain capacity for the parsing stack. + * This should be larger than the expected depth of the parsed + * YAML tree. + * + * The parsing stack is the only (potential) heap memory used by + * the parser. + * + * If the requested capacity is below the default + * stack size of 16, the memory is used directly in the parser + * object; otherwise it will be allocated from the heap. + * + * @note this reserves memory only for the parser itself; all the + * allocations for the parsed tree will go through the tree's + * allocator. + * + * @note the tree and the arena can (and should) also be reserved. */ + void reserve_stack(size_t capacity) + { + m_stack.reserve(capacity); + } + + /** Reserve a certain capacity for the array used to track node + * locations in the source buffer. */ + void reserve_locations(size_t num_source_lines) + { + _resize_locations(num_source_lines); + } + + /** Reserve a certain capacity for the character arena used to + * filter scalars. */ + void reserve_filter_arena(size_t num_characters) + { + _resize_filter_arena(num_characters); + } + + /** @} */ + +public: + + /** @name getters and modifiers */ + /** @{ */ + + /** Get the current callbacks in the parser. */ + Callbacks callbacks() const { return m_stack.m_callbacks; } + + /** Get the name of the latest file parsed by this object. */ + csubstr filename() const { return m_file; } + + /** Get the latest YAML buffer parsed by this object. */ + csubstr source() const { return m_buf; } + + size_t stack_capacity() const { return m_stack.capacity(); } + size_t locations_capacity() const { return m_newline_offsets_capacity; } + size_t filter_arena_capacity() const { return m_filter_arena.len; } + + ParserOptions const& options() const { return m_options; } + + /** @} */ + +public: + + /** @name parse_in_place */ + /** @{ */ + + /** Create a new tree and parse into its root. + * The tree is created with the callbacks currently in the parser. */ + Tree parse_in_place(csubstr filename, substr src) + { + Tree t(callbacks()); + t.reserve(_estimate_capacity(src)); + this->parse_in_place(filename, src, &t, t.root_id()); + return t; + } + + /** Parse into an existing tree, starting at its root node. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_place(csubstr filename, substr src, Tree *t) + { + this->parse_in_place(filename, src, t, t->root_id()); + } + + /** Parse into an existing node. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_place(csubstr filename, substr src, Tree *t, size_t node_id); + // ^^^^^^^^^^^^^ this is the workhorse overload; everything else is syntactic candy + + /** Parse into an existing node. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_place(csubstr filename, substr src, NodeRef node) + { + this->parse_in_place(filename, src, node.tree(), node.id()); + } + + RYML_DEPRECATED("use parse_in_place() instead") Tree parse(csubstr filename, substr src) { return parse_in_place(filename, src); } + RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t) { parse_in_place(filename, src, t); } + RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t, size_t node_id) { parse_in_place(filename, src, t, node_id); } + RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, NodeRef node) { parse_in_place(filename, src, node); } + + /** @} */ + +public: + + /** @name parse_in_arena: copy the YAML source buffer to the + * tree's arena, then parse the copy in situ + * + * @note overloads receiving a substr YAML buffer are intentionally + * left undefined, such that calling parse_in_arena() with a substr + * will cause a linker error. This is to prevent an accidental + * copy of the source buffer to the tree's arena, because substr + * is implicitly convertible to csubstr. If you really intend to parse + * a mutable buffer in the tree's arena, convert it first to immutable + * by assigning the substr to a csubstr prior to calling parse_in_arena(). + * This is not needed for parse_in_place() because csubstr is not + * implicitly convertible to substr. */ + /** @{ */ + + // READ THE NOTE ABOVE! + #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a linker error." + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr csrc); + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t); + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t, size_t node_id); + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, NodeRef node); + + /** Create a new tree and parse into its root. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + Tree parse_in_arena(csubstr filename, csubstr csrc) + { + Tree t(callbacks()); + substr src = t.copy_to_arena(csrc); + t.reserve(_estimate_capacity(csrc)); + this->parse_in_place(filename, src, &t, t.root_id()); + return t; + } + + /** Parse into an existing tree, starting at its root node. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_arena(csubstr filename, csubstr csrc, Tree *t) + { + substr src = t->copy_to_arena(csrc); + this->parse_in_place(filename, src, t, t->root_id()); + } + + /** Parse into a specific node in an existing tree. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_arena(csubstr filename, csubstr csrc, Tree *t, size_t node_id) + { + substr src = t->copy_to_arena(csrc); + this->parse_in_place(filename, src, t, node_id); + } + + /** Parse into a specific node in an existing tree. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_arena(csubstr filename, csubstr csrc, NodeRef node) + { + substr src = node.tree()->copy_to_arena(csrc); + this->parse_in_place(filename, src, node.tree(), node.id()); + } + + RYML_DEPRECATED("use parse_in_arena() instead") Tree parse(csubstr filename, csubstr csrc) { return parse_in_arena(filename, csrc); } + RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t) { parse_in_arena(filename, csrc, t); } + RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t, size_t node_id) { parse_in_arena(filename, csrc, t, node_id); } + RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, NodeRef node) { parse_in_arena(filename, csrc, node); } + + /** @} */ + +public: + + /** @name locations */ + /** @{ */ + + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(Tree const& tree, size_t node_id) const; + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(ConstNodeRef node) const; + /** Get the string starting at a particular location, to the end + * of the parsed source buffer. */ + csubstr location_contents(Location const& loc) const; + /** Given a pointer to a buffer position, get the location. @p val + * must be pointing to somewhere in the source buffer that was + * last parsed by this object. */ + Location val_location(const char *val) const; + + /** @} */ + +private: + + typedef enum { + BLOCK_LITERAL, //!< keep newlines (|) + BLOCK_FOLD //!< replace newline with single space (>) + } BlockStyle_e; + + typedef enum { + CHOMP_CLIP, //!< single newline at end (default) + CHOMP_STRIP, //!< no newline at end (-) + CHOMP_KEEP //!< all newlines from end (+) + } BlockChomp_e; + +private: + + using flag_t = int; + + static size_t _estimate_capacity(csubstr src) { size_t c = _count_nlines(src); c = c >= 16 ? c : 16; return c; } + + void _reset(); + + bool _finished_file() const; + bool _finished_line() const; + + csubstr _peek_next_line(size_t pos=npos) const; + bool _advance_to_peeked(); + void _scan_line(); + + csubstr _slurp_doc_scalar(); + + /** + * @param [out] quoted + * Will only be written to if this method returns true. + * Will be set to true if the scanned scalar was quoted, by '', "", > or |. + */ + bool _scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + bool _scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + + csubstr _scan_comment(); + csubstr _scan_squot_scalar(); + csubstr _scan_dquot_scalar(); + csubstr _scan_block(); + substr _scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation); + substr _scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line); + substr _scan_complex_key(csubstr currscalar, csubstr peeked_line); + csubstr _scan_to_next_nonempty_line(size_t indentation); + csubstr _extend_scanned_scalar(csubstr currscalar); + + csubstr _filter_squot_scalar(const substr s); + csubstr _filter_dquot_scalar(substr s); + csubstr _filter_plain_scalar(substr s, size_t indentation); + csubstr _filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation); + template + bool _filter_nl(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos, size_t indentation); + template + void _filter_ws(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos); + bool _apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp); + + void _handle_finished_file(); + void _handle_line(); + + bool _handle_indentation(); + + bool _handle_unk(); + bool _handle_map_flow(); + bool _handle_map_blck(); + bool _handle_seq_flow(); + bool _handle_seq_blck(); + bool _handle_top(); + bool _handle_types(); + bool _handle_key_anchors_and_refs(); + bool _handle_val_anchors_and_refs(); + void _move_val_tag_to_key_tag(); + void _move_key_tag_to_val_tag(); + void _move_key_tag2_to_key_tag(); + void _move_val_anchor_to_key_anchor(); + void _move_key_anchor_to_val_anchor(); + + void _push_level(bool explicit_flow_chars = false); + void _pop_level(); + + void _start_unk(bool as_child=true); + + void _start_map(bool as_child=true); + void _start_map_unk(bool as_child); + void _stop_map(); + + void _start_seq(bool as_child=true); + void _stop_seq(); + + void _start_seqimap(); + void _stop_seqimap(); + + void _start_doc(bool as_child=true); + void _stop_doc(); + void _start_new_doc(csubstr rem); + void _end_stream(); + + NodeData* _append_val(csubstr val, flag_t quoted=false); + NodeData* _append_key_val(csubstr val, flag_t val_quoted=false); + bool _rval_dash_start_or_continue_seq(); + + void _store_scalar(csubstr s, flag_t is_quoted); + csubstr _consume_scalar(); + void _move_scalar_from_top(); + + inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_val({nullptr, size_t(0)}); } + inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_key_val({nullptr, size_t(0)}); } + inline void _store_scalar_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); _store_scalar({nullptr, size_t(0)}, false); } + + void _set_indentation(size_t behind); + void _save_indentation(size_t behind=0); + bool _maybe_set_indentation_from_anchor_or_tag(); + + void _write_key_anchor(size_t node_id); + void _write_val_anchor(size_t node_id); + + void _handle_directive(csubstr directive); + + void _skipchars(char c); + template + void _skipchars(const char (&chars)[N]); + +private: + + static size_t _count_nlines(csubstr src); + +private: + + typedef enum : flag_t { + RTOP = 0x01 << 0, ///< reading at top level + RUNK = 0x01 << 1, ///< reading an unknown: must determine whether scalar, map or seq + RMAP = 0x01 << 2, ///< reading a map + RSEQ = 0x01 << 3, ///< reading a seq + FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {} + QMRK = 0x01 << 5, ///< reading an explicit key (`? key`) + RKEY = 0x01 << 6, ///< reading a scalar as key + RVAL = 0x01 << 7, ///< reading a scalar as val + RNXT = 0x01 << 8, ///< read next val or keyval + SSCL = 0x01 << 9, ///< there's a stored scalar + QSCL = 0x01 << 10, ///< stored scalar was quoted + RSET = 0x01 << 11, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html + NDOC = 0x01 << 12, ///< no document mode. a document has ended and another has not started yet. + //! reading an implicit map nested in an explicit seq. + //! eg, {key: [key2: value2, key3: value3]} + //! is parsed as {key: [{key2: value2}, {key3: value3}]} + RSEQIMAP = 0x01 << 13, + } State_e; + + struct LineContents + { + csubstr full; ///< the full line, including newlines on the right + csubstr stripped; ///< the stripped line, excluding newlines on the right + csubstr rem; ///< the stripped line remainder; initially starts at the first non-space character + size_t indentation; ///< the number of spaces on the beginning of the line + + LineContents() : full(), stripped(), rem(), indentation() {} + + void reset_with_next_line(csubstr buf, size_t pos); + + void reset(csubstr full_, csubstr stripped_) + { + full = full_; + stripped = stripped_; + rem = stripped_; + // find the first column where the character is not a space + indentation = full.first_not_of(' '); + } + + size_t current_col() const + { + return current_col(rem); + } + + size_t current_col(csubstr s) const + { + RYML_ASSERT(s.str >= full.str); + RYML_ASSERT(full.is_super(s)); + size_t col = static_cast(s.str - full.str); + return col; + } + }; + + struct State + { + flag_t flags; + size_t level; + size_t node_id; // don't hold a pointer to the node as it will be relocated during tree resizes + csubstr scalar; + size_t scalar_col; // the column where the scalar (or its quotes) begin + + Location pos; + LineContents line_contents; + size_t indref; + + State() : flags(), level(), node_id(), scalar(), scalar_col(), pos(), line_contents(), indref() {} + + void reset(const char *file, size_t node_id_) + { + flags = RUNK|RTOP; + level = 0; + pos.name = to_csubstr(file); + pos.offset = 0; + pos.line = 1; + pos.col = 1; + node_id = node_id_; + scalar_col = 0; + scalar.clear(); + indref = 0; + } + }; + + void _line_progressed(size_t ahead); + void _line_ended(); + void _line_ended_undo(); + + void _prepare_pop() + { + RYML_ASSERT(m_stack.size() > 1); + State const& curr = m_stack.top(); + State & next = m_stack.top(1); + next.pos = curr.pos; + next.line_contents = curr.line_contents; + next.scalar = curr.scalar; + } + + inline bool _at_line_begin() const + { + return m_state->line_contents.rem.begin() == m_state->line_contents.full.begin(); + } + inline bool _at_line_end() const + { + csubstr r = m_state->line_contents.rem; + return r.empty() || r.begins_with(' ', r.len); + } + inline bool _token_is_from_this_line(csubstr token) const + { + return token.is_sub(m_state->line_contents.full); + } + + inline NodeData * node(State const* s) const { return m_tree->get(s->node_id); } + inline NodeData * node(State const& s) const { return m_tree->get(s .node_id); } + inline NodeData * node(size_t node_id) const { return m_tree->get( node_id); } + + inline bool has_all(flag_t f) const { return (m_state->flags & f) == f; } + inline bool has_any(flag_t f) const { return (m_state->flags & f) != 0; } + inline bool has_none(flag_t f) const { return (m_state->flags & f) == 0; } + + static inline bool has_all(flag_t f, State const* s) { return (s->flags & f) == f; } + static inline bool has_any(flag_t f, State const* s) { return (s->flags & f) != 0; } + static inline bool has_none(flag_t f, State const* s) { return (s->flags & f) == 0; } + + inline void set_flags(flag_t f) { set_flags(f, m_state); } + inline void add_flags(flag_t on) { add_flags(on, m_state); } + inline void addrem_flags(flag_t on, flag_t off) { addrem_flags(on, off, m_state); } + inline void rem_flags(flag_t off) { rem_flags(off, m_state); } + + void set_flags(flag_t f, State * s); + void add_flags(flag_t on, State * s); + void addrem_flags(flag_t on, flag_t off, State * s); + void rem_flags(flag_t off, State * s); + + void _resize_filter_arena(size_t num_characters); + void _grow_filter_arena(size_t num_characters); + substr _finish_filter_arena(substr dst, size_t pos); + + void _prepare_locations(); + void _resize_locations(size_t sz); + bool _locations_dirty() const; + + bool _location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const; + bool _location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const; + +private: + + void _free(); + void _clr(); + void _cp(Parser const* that); + void _mv(Parser *that); + +#ifdef RYML_DBG + template void _dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const; +#endif + template void _err(csubstr fmt, Args const& C4_RESTRICT ...args) const; + template void _fmt_msg(DumpFn &&dumpfn) const; + static csubstr _prfl(substr buf, flag_t v); + +private: + + ParserOptions m_options; + + csubstr m_file; + substr m_buf; + + size_t m_root_id; + Tree * m_tree; + + detail::stack m_stack; + State * m_state; + + size_t m_key_tag_indentation; + size_t m_key_tag2_indentation; + csubstr m_key_tag; + csubstr m_key_tag2; + size_t m_val_tag_indentation; + csubstr m_val_tag; + + bool m_key_anchor_was_before; + size_t m_key_anchor_indentation; + csubstr m_key_anchor; + size_t m_val_anchor_indentation; + csubstr m_val_anchor; + + substr m_filter_arena; + + size_t *m_newline_offsets; + size_t m_newline_offsets_size; + size_t m_newline_offsets_capacity; + csubstr m_newline_offsets_buf; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @name parse_in_place + * + * @desc parse a mutable YAML source buffer. + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily parse YAML without the need + * to instantiate a separate parser. Note that some properties + * (notably node locations in the original source code) are only + * available through the parser object after it has parsed the + * code. If you need access to any of these properties, use + * Parser::parse_in_place() */ +/** @{ */ + +inline Tree parse_in_place( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } //!< parse in-situ a modifiable YAML source buffer. +inline Tree parse_in_place(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } //!< parse in-situ a modifiable YAML source buffer, providing a filename for error messages. +inline void parse_in_place( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer +inline void parse_in_place(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. +inline void parse_in_place( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer +inline void parse_in_place(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. +inline void parse_in_place( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer +inline void parse_in_place(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. + +RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } +RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } + +/** @} */ + + +//----------------------------------------------------------------------------- + +/** @name parse_in_arena + * @desc parse a read-only YAML source buffer, copying it first to the tree's arena. + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily parse YAML without the need + * to instantiate a separate parser. Note that some properties + * (notably node locations in the original source code) are only + * available through the parser object after it has parsed the + * code. If you need access to any of these properties, use + * Parser::parse_in_arena(). + * + * @note overloads receiving a substr YAML buffer are intentionally + * left undefined, such that calling parse_in_arena() with a substr + * will cause a linker error. This is to prevent an accidental + * copy of the source buffer to the tree's arena, because substr + * is implicitly convertible to csubstr. If you really intend to parse + * a mutable buffer in the tree's arena, convert it first to immutable + * by assigning the substr to a csubstr prior to calling parse_in_arena(). + * This is not needed for parse_in_place() because csubstr is not + * implicitly convertible to substr. */ +/** @{ */ + +/* READ THE NOTE ABOVE! */ +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena( substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t, size_t node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t, size_t node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, NodeRef node ); + +inline Tree parse_in_arena( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline Tree parse_in_arena(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +inline void parse_in_arena( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +inline void parse_in_arena( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +inline void parse_in_arena( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. + +RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. + +/** @} */ + +} // namespace yml +} // namespace c4 + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#endif /* _C4_YML_PARSE_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/map.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_STD_MAP_HPP_ +#define _C4_YML_STD_MAP_HPP_ + +/** @file map.hpp write/read std::map to/from a YAML tree. */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +#include + +namespace c4 { +namespace yml { + +// std::map requires child nodes in the data +// tree hierarchy (a MAP node in ryml parlance). +// So it should be serialized via write()/read(). + +template +void write(c4::yml::NodeRef *n, std::map const& m) +{ + *n |= c4::yml::MAP; + for(auto const& C4_RESTRICT p : m) + { + auto ch = n->append_child(); + ch << c4::yml::key(p.first); + ch << p.second; + } +} + +template +bool read(c4::yml::ConstNodeRef const& n, std::map * m) +{ + K k{}; + V v{}; + for(auto const& C4_RESTRICT ch : n) + { + ch >> c4::yml::key(k); + ch >> v; + m->emplace(std::make_pair(std::move(k), std::move(v))); + } + return true; +} + +} // namespace yml +} // namespace c4 + +#endif // _C4_YML_STD_MAP_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/string.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_YML_STD_STRING_HPP_ +#define C4_YML_STD_STRING_HPP_ + +/** @file string.hpp substring conversions for/from std::string */ + +// everything we need is implemented here: +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/std/string.hpp +//#include +#if !defined(C4_STD_STRING_HPP_) && !defined(_C4_STD_STRING_HPP_) +#error "amalgamate: file c4/std/string.hpp must have been included at this point" +#endif /* C4_STD_STRING_HPP_ */ + + +#endif // C4_YML_STD_STRING_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/vector.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_STD_VECTOR_HPP_ +#define _C4_YML_STD_VECTOR_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/std/vector.hpp +//#include +#if !defined(C4_STD_VECTOR_HPP_) && !defined(_C4_STD_VECTOR_HPP_) +#error "amalgamate: file c4/std/vector.hpp must have been included at this point" +#endif /* C4_STD_VECTOR_HPP_ */ + +//included above: +//#include + +namespace c4 { +namespace yml { + +// vector is a sequence-like type, and it requires child nodes +// in the data tree hierarchy (a SEQ node in ryml parlance). +// So it should be serialized via write()/read(). + + +template +void write(c4::yml::NodeRef *n, std::vector const& vec) +{ + *n |= c4::yml::SEQ; + for(auto const& v : vec) + n->append_child() << v; +} + +template +bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) +{ + vec->resize(n.num_children()); + size_t pos = 0; + for(auto const ch : n) + ch >> (*vec)[pos++]; + return true; +} + +/** specialization: std::vector uses std::vector::reference as + * the return value of its operator[]. */ +template +bool read(c4::yml::ConstNodeRef const& n, std::vector *vec) +{ + vec->resize(n.num_children()); + size_t pos = 0; + bool tmp; + for(auto const ch : n) + { + ch >> tmp; + (*vec)[pos++] = tmp; + } + return true; +} + +} // namespace yml +} // namespace c4 + +#endif // _C4_YML_STD_VECTOR_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/std.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_STD_STD_HPP_ +#define _C4_YML_STD_STD_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp +//#include "c4/yml/std/string.hpp" +#if !defined(C4_YML_STD_STRING_HPP_) && !defined(_C4_YML_STD_STRING_HPP_) +#error "amalgamate: file c4/yml/std/string.hpp must have been included at this point" +#endif /* C4_YML_STD_STRING_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp +//#include "c4/yml/std/vector.hpp" +#if !defined(C4_YML_STD_VECTOR_HPP_) && !defined(_C4_YML_STD_VECTOR_HPP_) +#error "amalgamate: file c4/yml/std/vector.hpp must have been included at this point" +#endif /* C4_YML_STD_VECTOR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp +//#include "c4/yml/std/map.hpp" +#if !defined(C4_YML_STD_MAP_HPP_) && !defined(_C4_YML_STD_MAP_HPP_) +#error "amalgamate: file c4/yml/std/map.hpp must have been included at this point" +#endif /* C4_YML_STD_MAP_HPP_ */ + + +#endif // _C4_YML_STD_STD_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/common.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ + + +#ifndef RYML_NO_DEFAULT_CALLBACKS +//included above: +//# include +//included above: +//# include +#endif // RYML_NO_DEFAULT_CALLBACKS + +namespace c4 { +namespace yml { + +namespace { +Callbacks s_default_callbacks; +} // anon namespace + +#ifndef RYML_NO_DEFAULT_CALLBACKS +void report_error_impl(const char* msg, size_t length, Location loc, FILE *f) +{ + if(!f) + f = stderr; + if(loc) + { + if(!loc.name.empty()) + { + fwrite(loc.name.str, 1, loc.name.len, f); + fputc(':', f); + } + fprintf(f, "%zu:", loc.line); + if(loc.col) + fprintf(f, "%zu:", loc.col); + if(loc.offset) + fprintf(f, " (%zuB):", loc.offset); + } + fprintf(f, "%.*s\n", (int)length, msg); + fflush(f); +} + +void error_impl(const char* msg, size_t length, Location loc, void * /*user_data*/) +{ + report_error_impl(msg, length, loc, nullptr); + ::abort(); +} + +void* allocate_impl(size_t length, void * /*hint*/, void * /*user_data*/) +{ + void *mem = ::malloc(length); + if(mem == nullptr) + { + const char msg[] = "could not allocate memory"; + error_impl(msg, sizeof(msg)-1, {}, nullptr); + } + return mem; +} + +void free_impl(void *mem, size_t /*length*/, void * /*user_data*/) +{ + ::free(mem); +} +#endif // RYML_NO_DEFAULT_CALLBACKS + + + +Callbacks::Callbacks() + : + m_user_data(nullptr), + #ifndef RYML_NO_DEFAULT_CALLBACKS + m_allocate(allocate_impl), + m_free(free_impl), + m_error(error_impl) + #else + m_allocate(nullptr), + m_free(nullptr), + m_error(nullptr) + #endif +{ +} + +Callbacks::Callbacks(void *user_data, pfn_allocate alloc_, pfn_free free_, pfn_error error_) + : + m_user_data(user_data), + #ifndef RYML_NO_DEFAULT_CALLBACKS + m_allocate(alloc_ ? alloc_ : allocate_impl), + m_free(free_ ? free_ : free_impl), + m_error(error_ ? error_ : error_impl) + #else + m_allocate(alloc_), + m_free(free_), + m_error(error_) + #endif +{ + C4_CHECK(m_allocate); + C4_CHECK(m_free); + C4_CHECK(m_error); +} + + +void set_callbacks(Callbacks const& c) +{ + s_default_callbacks = c; +} + +Callbacks const& get_callbacks() +{ + return s_default_callbacks; +} + +void reset_callbacks() +{ + set_callbacks(Callbacks()); +} + +void error(const char *msg, size_t msg_len, Location loc) +{ + s_default_callbacks.m_error(msg, msg_len, loc, s_default_callbacks.m_user_data); +} + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/tree.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp +//#include "c4/yml/detail/stack.hpp" +#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_) +#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_STACK_HPP_ */ + + + +C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wtype-limits") +C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4296/*expression is always 'boolean_value'*/) + +namespace c4 { +namespace yml { + + +csubstr normalize_tag(csubstr tag) +{ + YamlTag_e t = to_tag(tag); + if(t != TAG_NONE) + return from_tag(t); + if(tag.begins_with("!<")) + tag = tag.sub(1); + if(tag.begins_with(""}; + case TAG_OMAP: + return {""}; + case TAG_PAIRS: + return {""}; + case TAG_SET: + return {""}; + case TAG_SEQ: + return {""}; + case TAG_BINARY: + return {""}; + case TAG_BOOL: + return {""}; + case TAG_FLOAT: + return {""}; + case TAG_INT: + return {""}; + case TAG_MERGE: + return {""}; + case TAG_NULL: + return {""}; + case TAG_STR: + return {""}; + case TAG_TIMESTAMP: + return {""}; + case TAG_VALUE: + return {""}; + case TAG_YAML: + return {""}; + case TAG_NONE: + return {""}; + } + return {""}; +} + +csubstr from_tag(YamlTag_e tag) +{ + switch(tag) + { + case TAG_MAP: + return {"!!map"}; + case TAG_OMAP: + return {"!!omap"}; + case TAG_PAIRS: + return {"!!pairs"}; + case TAG_SET: + return {"!!set"}; + case TAG_SEQ: + return {"!!seq"}; + case TAG_BINARY: + return {"!!binary"}; + case TAG_BOOL: + return {"!!bool"}; + case TAG_FLOAT: + return {"!!float"}; + case TAG_INT: + return {"!!int"}; + case TAG_MERGE: + return {"!!merge"}; + case TAG_NULL: + return {"!!null"}; + case TAG_STR: + return {"!!str"}; + case TAG_TIMESTAMP: + return {"!!timestamp"}; + case TAG_VALUE: + return {"!!value"}; + case TAG_YAML: + return {"!!yaml"}; + case TAG_NONE: + return {""}; + } + return {""}; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +const char* NodeType::type_str(NodeType_e ty) +{ + switch(ty & _TYMASK) + { + case KEYVAL: + return "KEYVAL"; + case KEY: + return "KEY"; + case VAL: + return "VAL"; + case MAP: + return "MAP"; + case SEQ: + return "SEQ"; + case KEYMAP: + return "KEYMAP"; + case KEYSEQ: + return "KEYSEQ"; + case DOCSEQ: + return "DOCSEQ"; + case DOCMAP: + return "DOCMAP"; + case DOCVAL: + return "DOCVAL"; + case DOC: + return "DOC"; + case STREAM: + return "STREAM"; + case NOTYPE: + return "NOTYPE"; + default: + if((ty & KEYVAL) == KEYVAL) + return "KEYVAL***"; + if((ty & KEYMAP) == KEYMAP) + return "KEYMAP***"; + if((ty & KEYSEQ) == KEYSEQ) + return "KEYSEQ***"; + if((ty & DOCSEQ) == DOCSEQ) + return "DOCSEQ***"; + if((ty & DOCMAP) == DOCMAP) + return "DOCMAP***"; + if((ty & DOCVAL) == DOCVAL) + return "DOCVAL***"; + if(ty & KEY) + return "KEY***"; + if(ty & VAL) + return "VAL***"; + if(ty & MAP) + return "MAP***"; + if(ty & SEQ) + return "SEQ***"; + if(ty & DOC) + return "DOC***"; + return "(unk)"; + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +NodeRef Tree::rootref() +{ + return NodeRef(this, root_id()); +} +ConstNodeRef Tree::rootref() const +{ + return ConstNodeRef(this, root_id()); +} + +ConstNodeRef Tree::crootref() +{ + return ConstNodeRef(this, root_id()); +} +ConstNodeRef Tree::crootref() const +{ + return ConstNodeRef(this, root_id()); +} + +NodeRef Tree::ref(size_t id) +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return NodeRef(this, id); +} +ConstNodeRef Tree::ref(size_t id) const +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return ConstNodeRef(this, id); +} + +ConstNodeRef Tree::cref(size_t id) +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return ConstNodeRef(this, id); +} +ConstNodeRef Tree::cref(size_t id) const +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return ConstNodeRef(this, id); +} + +NodeRef Tree::operator[] (csubstr key) +{ + return rootref()[key]; +} +ConstNodeRef Tree::operator[] (csubstr key) const +{ + return rootref()[key]; +} + +NodeRef Tree::operator[] (size_t i) +{ + return rootref()[i]; +} +ConstNodeRef Tree::operator[] (size_t i) const +{ + return rootref()[i]; +} + +NodeRef Tree::docref(size_t i) +{ + return ref(doc(i)); +} +ConstNodeRef Tree::docref(size_t i) const +{ + return cref(doc(i)); +} + + +//----------------------------------------------------------------------------- +Tree::Tree(Callbacks const& cb) + : m_buf(nullptr) + , m_cap(0) + , m_size(0) + , m_free_head(NONE) + , m_free_tail(NONE) + , m_arena() + , m_arena_pos(0) + , m_callbacks(cb) +{ +} + +Tree::Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb) + : Tree(cb) +{ + reserve(node_capacity); + reserve_arena(arena_capacity); +} + +Tree::~Tree() +{ + _free(); +} + + +Tree::Tree(Tree const& that) noexcept : Tree(that.m_callbacks) +{ + _copy(that); +} + +Tree& Tree::operator= (Tree const& that) noexcept +{ + _free(); + m_callbacks = that.m_callbacks; + _copy(that); + return *this; +} + +Tree::Tree(Tree && that) noexcept : Tree(that.m_callbacks) +{ + _move(that); +} + +Tree& Tree::operator= (Tree && that) noexcept +{ + _free(); + m_callbacks = that.m_callbacks; + _move(that); + return *this; +} + +void Tree::_free() +{ + if(m_buf) + { + _RYML_CB_ASSERT(m_callbacks, m_cap > 0); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); + } + if(m_arena.str) + { + _RYML_CB_ASSERT(m_callbacks, m_arena.len > 0); + _RYML_CB_FREE(m_callbacks, m_arena.str, char, m_arena.len); + } + _clear(); +} + + +C4_SUPPRESS_WARNING_GCC_PUSH +#if defined(__GNUC__) && __GNUC__>= 8 + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wclass-memaccess") // error: ‘void* memset(void*, int, size_t)’ clearing an object of type ‘class c4::yml::Tree’ with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +void Tree::_clear() +{ + m_buf = nullptr; + m_cap = 0; + m_size = 0; + m_free_head = 0; + m_free_tail = 0; + m_arena = {}; + m_arena_pos = 0; + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = {}; +} + +void Tree::_copy(Tree const& that) +{ + _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); + m_buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, that.m_cap, that.m_buf); + memcpy(m_buf, that.m_buf, that.m_cap * sizeof(NodeData)); + m_cap = that.m_cap; + m_size = that.m_size; + m_free_head = that.m_free_head; + m_free_tail = that.m_free_tail; + m_arena_pos = that.m_arena_pos; + m_arena = that.m_arena; + if(that.m_arena.str) + { + _RYML_CB_ASSERT(m_callbacks, that.m_arena.len > 0); + substr arena; + arena.str = _RYML_CB_ALLOC_HINT(m_callbacks, char, that.m_arena.len, that.m_arena.str); + arena.len = that.m_arena.len; + _relocate(arena); // does a memcpy of the arena and updates nodes using the old arena + m_arena = arena; + } + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = that.m_tag_directives[i]; +} + +void Tree::_move(Tree & that) +{ + _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); + m_buf = that.m_buf; + m_cap = that.m_cap; + m_size = that.m_size; + m_free_head = that.m_free_head; + m_free_tail = that.m_free_tail; + m_arena = that.m_arena; + m_arena_pos = that.m_arena_pos; + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = that.m_tag_directives[i]; + that._clear(); +} + +void Tree::_relocate(substr next_arena) +{ + _RYML_CB_ASSERT(m_callbacks, next_arena.not_empty()); + _RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len); + memcpy(next_arena.str, m_arena.str, m_arena_pos); + for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n) + { + if(in_arena(n->m_key.scalar)) + n->m_key.scalar = _relocated(n->m_key.scalar, next_arena); + if(in_arena(n->m_key.tag)) + n->m_key.tag = _relocated(n->m_key.tag, next_arena); + if(in_arena(n->m_key.anchor)) + n->m_key.anchor = _relocated(n->m_key.anchor, next_arena); + if(in_arena(n->m_val.scalar)) + n->m_val.scalar = _relocated(n->m_val.scalar, next_arena); + if(in_arena(n->m_val.tag)) + n->m_val.tag = _relocated(n->m_val.tag, next_arena); + if(in_arena(n->m_val.anchor)) + n->m_val.anchor = _relocated(n->m_val.anchor, next_arena); + } + for(TagDirective &C4_RESTRICT td : m_tag_directives) + { + if(in_arena(td.prefix)) + td.prefix = _relocated(td.prefix, next_arena); + if(in_arena(td.handle)) + td.handle = _relocated(td.handle, next_arena); + } +} + + +//----------------------------------------------------------------------------- +void Tree::reserve(size_t cap) +{ + if(cap > m_cap) + { + NodeData *buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, cap, m_buf); + if(m_buf) + { + memcpy(buf, m_buf, m_cap * sizeof(NodeData)); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); + } + size_t first = m_cap, del = cap - m_cap; + m_cap = cap; + m_buf = buf; + _clear_range(first, del); + if(m_free_head != NONE) + { + _RYML_CB_ASSERT(m_callbacks, m_buf != nullptr); + _RYML_CB_ASSERT(m_callbacks, m_free_tail != NONE); + m_buf[m_free_tail].m_next_sibling = first; + m_buf[first].m_prev_sibling = m_free_tail; + m_free_tail = cap-1; + } + else + { + _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE); + m_free_head = first; + m_free_tail = cap-1; + } + _RYML_CB_ASSERT(m_callbacks, m_free_head == NONE || (m_free_head >= 0 && m_free_head < cap)); + _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE || (m_free_tail >= 0 && m_free_tail < cap)); + + if( ! m_size) + _claim_root(); + } +} + + +//----------------------------------------------------------------------------- +void Tree::clear() +{ + _clear_range(0, m_cap); + m_size = 0; + if(m_buf) + { + _RYML_CB_ASSERT(m_callbacks, m_cap >= 0); + m_free_head = 0; + m_free_tail = m_cap-1; + _claim_root(); + } + else + { + m_free_head = NONE; + m_free_tail = NONE; + } + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = {}; +} + +void Tree::_claim_root() +{ + size_t r = _claim(); + _RYML_CB_ASSERT(m_callbacks, r == 0); + _set_hierarchy(r, NONE, NONE); +} + + +//----------------------------------------------------------------------------- +void Tree::_clear_range(size_t first, size_t num) +{ + if(num == 0) + return; // prevent overflow when subtracting + _RYML_CB_ASSERT(m_callbacks, first >= 0 && first + num <= m_cap); + memset(m_buf + first, 0, num * sizeof(NodeData)); // TODO we should not need this + for(size_t i = first, e = first + num; i < e; ++i) + { + _clear(i); + NodeData *n = m_buf + i; + n->m_prev_sibling = i - 1; + n->m_next_sibling = i + 1; + } + m_buf[first + num - 1].m_next_sibling = NONE; +} + +C4_SUPPRESS_WARNING_GCC_POP + + +//----------------------------------------------------------------------------- +void Tree::_release(size_t i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + + _rem_hierarchy(i); + _free_list_add(i); + _clear(i); + + --m_size; +} + +//----------------------------------------------------------------------------- +// add to the front of the free list +void Tree::_free_list_add(size_t i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + NodeData &C4_RESTRICT w = m_buf[i]; + + w.m_parent = NONE; + w.m_next_sibling = m_free_head; + w.m_prev_sibling = NONE; + if(m_free_head != NONE) + m_buf[m_free_head].m_prev_sibling = i; + m_free_head = i; + if(m_free_tail == NONE) + m_free_tail = m_free_head; +} + +void Tree::_free_list_rem(size_t i) +{ + if(m_free_head == i) + m_free_head = _p(i)->m_next_sibling; + _rem_hierarchy(i); +} + +//----------------------------------------------------------------------------- +size_t Tree::_claim() +{ + if(m_free_head == NONE || m_buf == nullptr) + { + size_t sz = 2 * m_cap; + sz = sz ? sz : 16; + reserve(sz); + _RYML_CB_ASSERT(m_callbacks, m_free_head != NONE); + } + + _RYML_CB_ASSERT(m_callbacks, m_size < m_cap); + _RYML_CB_ASSERT(m_callbacks, m_free_head >= 0 && m_free_head < m_cap); + + size_t ichild = m_free_head; + NodeData *child = m_buf + ichild; + + ++m_size; + m_free_head = child->m_next_sibling; + if(m_free_head == NONE) + { + m_free_tail = NONE; + _RYML_CB_ASSERT(m_callbacks, m_size == m_cap); + } + + _clear(ichild); + + return ichild; +} + +//----------------------------------------------------------------------------- + +C4_SUPPRESS_WARNING_GCC_PUSH +C4_SUPPRESS_WARNING_CLANG_PUSH +C4_SUPPRESS_WARNING_CLANG("-Wnull-dereference") +#if defined(__GNUC__) && (__GNUC__ >= 6) +C4_SUPPRESS_WARNING_GCC("-Wnull-dereference") +#endif + +void Tree::_set_hierarchy(size_t ichild, size_t iparent, size_t iprev_sibling) +{ + _RYML_CB_ASSERT(m_callbacks, iparent == NONE || (iparent >= 0 && iparent < m_cap)); + _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE || (iprev_sibling >= 0 && iprev_sibling < m_cap)); + + NodeData *C4_RESTRICT child = get(ichild); + + child->m_parent = iparent; + child->m_prev_sibling = NONE; + child->m_next_sibling = NONE; + + if(iparent == NONE) + { + _RYML_CB_ASSERT(m_callbacks, ichild == 0); + _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE); + } + + if(iparent == NONE) + return; + + size_t inext_sibling = iprev_sibling != NONE ? next_sibling(iprev_sibling) : first_child(iparent); + NodeData *C4_RESTRICT parent = get(iparent); + NodeData *C4_RESTRICT psib = get(iprev_sibling); + NodeData *C4_RESTRICT nsib = get(inext_sibling); + + if(psib) + { + _RYML_CB_ASSERT(m_callbacks, next_sibling(iprev_sibling) == id(nsib)); + child->m_prev_sibling = id(psib); + psib->m_next_sibling = id(child); + _RYML_CB_ASSERT(m_callbacks, psib->m_prev_sibling != psib->m_next_sibling || psib->m_prev_sibling == NONE); + } + + if(nsib) + { + _RYML_CB_ASSERT(m_callbacks, prev_sibling(inext_sibling) == id(psib)); + child->m_next_sibling = id(nsib); + nsib->m_prev_sibling = id(child); + _RYML_CB_ASSERT(m_callbacks, nsib->m_prev_sibling != nsib->m_next_sibling || nsib->m_prev_sibling == NONE); + } + + if(parent->m_first_child == NONE) + { + _RYML_CB_ASSERT(m_callbacks, parent->m_last_child == NONE); + parent->m_first_child = id(child); + parent->m_last_child = id(child); + } + else + { + if(child->m_next_sibling == parent->m_first_child) + parent->m_first_child = id(child); + + if(child->m_prev_sibling == parent->m_last_child) + parent->m_last_child = id(child); + } +} + +C4_SUPPRESS_WARNING_GCC_POP +C4_SUPPRESS_WARNING_CLANG_POP + + +//----------------------------------------------------------------------------- +void Tree::_rem_hierarchy(size_t i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + + NodeData &C4_RESTRICT w = m_buf[i]; + + // remove from the parent + if(w.m_parent != NONE) + { + NodeData &C4_RESTRICT p = m_buf[w.m_parent]; + if(p.m_first_child == i) + { + p.m_first_child = w.m_next_sibling; + } + if(p.m_last_child == i) + { + p.m_last_child = w.m_prev_sibling; + } + } + + // remove from the used list + if(w.m_prev_sibling != NONE) + { + NodeData *C4_RESTRICT prev = get(w.m_prev_sibling); + prev->m_next_sibling = w.m_next_sibling; + } + if(w.m_next_sibling != NONE) + { + NodeData *C4_RESTRICT next = get(w.m_next_sibling); + next->m_prev_sibling = w.m_prev_sibling; + } +} + +//----------------------------------------------------------------------------- +void Tree::reorder() +{ + size_t r = root_id(); + _do_reorder(&r, 0); +} + +//----------------------------------------------------------------------------- +size_t Tree::_do_reorder(size_t *node, size_t count) +{ + // swap this node if it's not in place + if(*node != count) + { + _swap(*node, count); + *node = count; + } + ++count; // bump the count from this node + + // now descend in the hierarchy + for(size_t i = first_child(*node); i != NONE; i = next_sibling(i)) + { + // this child may have been relocated to a different index, + // so get an updated version + count = _do_reorder(&i, count); + } + return count; +} + +//----------------------------------------------------------------------------- +void Tree::_swap(size_t n_, size_t m_) +{ + _RYML_CB_ASSERT(m_callbacks, (parent(n_) != NONE) || type(n_) == NOTYPE); + _RYML_CB_ASSERT(m_callbacks, (parent(m_) != NONE) || type(m_) == NOTYPE); + NodeType tn = type(n_); + NodeType tm = type(m_); + if(tn != NOTYPE && tm != NOTYPE) + { + _swap_props(n_, m_); + _swap_hierarchy(n_, m_); + } + else if(tn == NOTYPE && tm != NOTYPE) + { + _copy_props(n_, m_); + _free_list_rem(n_); + _copy_hierarchy(n_, m_); + _clear(m_); + _free_list_add(m_); + } + else if(tn != NOTYPE && tm == NOTYPE) + { + _copy_props(m_, n_); + _free_list_rem(m_); + _copy_hierarchy(m_, n_); + _clear(n_); + _free_list_add(n_); + } + else + { + C4_NEVER_REACH(); + } +} + +//----------------------------------------------------------------------------- +void Tree::_swap_hierarchy(size_t ia, size_t ib) +{ + if(ia == ib) return; + + for(size_t i = first_child(ia); i != NONE; i = next_sibling(i)) + { + if(i == ib || i == ia) + continue; + _p(i)->m_parent = ib; + } + + for(size_t i = first_child(ib); i != NONE; i = next_sibling(i)) + { + if(i == ib || i == ia) + continue; + _p(i)->m_parent = ia; + } + + auto & C4_RESTRICT a = *_p(ia); + auto & C4_RESTRICT b = *_p(ib); + auto & C4_RESTRICT pa = *_p(a.m_parent); + auto & C4_RESTRICT pb = *_p(b.m_parent); + + if(&pa == &pb) + { + if((pa.m_first_child == ib && pa.m_last_child == ia) + || + (pa.m_first_child == ia && pa.m_last_child == ib)) + { + std::swap(pa.m_first_child, pa.m_last_child); + } + else + { + bool changed = false; + if(pa.m_first_child == ia) + { + pa.m_first_child = ib; + changed = true; + } + if(pa.m_last_child == ia) + { + pa.m_last_child = ib; + changed = true; + } + if(pb.m_first_child == ib && !changed) + { + pb.m_first_child = ia; + } + if(pb.m_last_child == ib && !changed) + { + pb.m_last_child = ia; + } + } + } + else + { + if(pa.m_first_child == ia) + pa.m_first_child = ib; + if(pa.m_last_child == ia) + pa.m_last_child = ib; + if(pb.m_first_child == ib) + pb.m_first_child = ia; + if(pb.m_last_child == ib) + pb.m_last_child = ia; + } + std::swap(a.m_first_child , b.m_first_child); + std::swap(a.m_last_child , b.m_last_child); + + if(a.m_prev_sibling != ib && b.m_prev_sibling != ia && + a.m_next_sibling != ib && b.m_next_sibling != ia) + { + if(a.m_prev_sibling != NONE && a.m_prev_sibling != ib) + _p(a.m_prev_sibling)->m_next_sibling = ib; + if(a.m_next_sibling != NONE && a.m_next_sibling != ib) + _p(a.m_next_sibling)->m_prev_sibling = ib; + if(b.m_prev_sibling != NONE && b.m_prev_sibling != ia) + _p(b.m_prev_sibling)->m_next_sibling = ia; + if(b.m_next_sibling != NONE && b.m_next_sibling != ia) + _p(b.m_next_sibling)->m_prev_sibling = ia; + std::swap(a.m_prev_sibling, b.m_prev_sibling); + std::swap(a.m_next_sibling, b.m_next_sibling); + } + else + { + if(a.m_next_sibling == ib) // n will go after m + { + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling == ia); + if(a.m_prev_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ib); + _p(a.m_prev_sibling)->m_next_sibling = ib; + } + if(b.m_next_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ia); + _p(b.m_next_sibling)->m_prev_sibling = ia; + } + size_t ns = b.m_next_sibling; + b.m_prev_sibling = a.m_prev_sibling; + b.m_next_sibling = ia; + a.m_prev_sibling = ib; + a.m_next_sibling = ns; + } + else if(a.m_prev_sibling == ib) // m will go after n + { + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling == ia); + if(b.m_prev_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ia); + _p(b.m_prev_sibling)->m_next_sibling = ia; + } + if(a.m_next_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ib); + _p(a.m_next_sibling)->m_prev_sibling = ib; + } + size_t ns = b.m_prev_sibling; + a.m_prev_sibling = b.m_prev_sibling; + a.m_next_sibling = ib; + b.m_prev_sibling = ia; + b.m_next_sibling = ns; + } + else + { + C4_NEVER_REACH(); + } + } + _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ia); + _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ia); + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ib); + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ib); + + if(a.m_parent != ib && b.m_parent != ia) + { + std::swap(a.m_parent, b.m_parent); + } + else + { + if(a.m_parent == ib && b.m_parent != ia) + { + a.m_parent = b.m_parent; + b.m_parent = ia; + } + else if(a.m_parent != ib && b.m_parent == ia) + { + b.m_parent = a.m_parent; + a.m_parent = ib; + } + else + { + C4_NEVER_REACH(); + } + } +} + +//----------------------------------------------------------------------------- +void Tree::_copy_hierarchy(size_t dst_, size_t src_) +{ + auto const& C4_RESTRICT src = *_p(src_); + auto & C4_RESTRICT dst = *_p(dst_); + auto & C4_RESTRICT prt = *_p(src.m_parent); + for(size_t i = src.m_first_child; i != NONE; i = next_sibling(i)) + { + _p(i)->m_parent = dst_; + } + if(src.m_prev_sibling != NONE) + { + _p(src.m_prev_sibling)->m_next_sibling = dst_; + } + if(src.m_next_sibling != NONE) + { + _p(src.m_next_sibling)->m_prev_sibling = dst_; + } + if(prt.m_first_child == src_) + { + prt.m_first_child = dst_; + } + if(prt.m_last_child == src_) + { + prt.m_last_child = dst_; + } + dst.m_parent = src.m_parent; + dst.m_first_child = src.m_first_child; + dst.m_last_child = src.m_last_child; + dst.m_prev_sibling = src.m_prev_sibling; + dst.m_next_sibling = src.m_next_sibling; +} + +//----------------------------------------------------------------------------- +void Tree::_swap_props(size_t n_, size_t m_) +{ + NodeData &C4_RESTRICT n = *_p(n_); + NodeData &C4_RESTRICT m = *_p(m_); + std::swap(n.m_type, m.m_type); + std::swap(n.m_key, m.m_key); + std::swap(n.m_val, m.m_val); +} + +//----------------------------------------------------------------------------- +void Tree::move(size_t node, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, node != after); + _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); + _RYML_CB_ASSERT(m_callbacks, (after == NONE) || (has_sibling(node, after) && has_sibling(after, node))); + + _rem_hierarchy(node); + _set_hierarchy(node, parent(node), after); +} + +//----------------------------------------------------------------------------- + +void Tree::move(size_t node, size_t new_parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, node != after); + _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != node); + _RYML_CB_ASSERT(m_callbacks, new_parent != after); + _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); + + _rem_hierarchy(node); + _set_hierarchy(node, new_parent, after); +} + +size_t Tree::move(Tree *src, size_t node, size_t new_parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != after); + + size_t dup = duplicate(src, node, new_parent, after); + src->remove(node); + return dup; +} + +void Tree::set_root_as_stream() +{ + size_t root = root_id(); + if(is_stream(root)) + return; + // don't use _add_flags() because it's checked and will fail + if(!has_children(root)) + { + if(is_val(root)) + { + _p(root)->m_type.add(SEQ); + size_t next_doc = append_child(root); + _copy_props_wo_key(next_doc, root); + _p(next_doc)->m_type.add(DOC); + _p(next_doc)->m_type.rem(SEQ); + } + _p(root)->m_type = STREAM; + return; + } + _RYML_CB_ASSERT(m_callbacks, !has_key(root)); + size_t next_doc = append_child(root); + _copy_props_wo_key(next_doc, root); + _add_flags(next_doc, DOC); + for(size_t prev = NONE, ch = first_child(root), next = next_sibling(ch); ch != NONE; ) + { + if(ch == next_doc) + break; + move(ch, next_doc, prev); + prev = ch; + ch = next; + next = next_sibling(next); + } + _p(root)->m_type = STREAM; +} + + +//----------------------------------------------------------------------------- +void Tree::remove_children(size_t node) +{ + _RYML_CB_ASSERT(m_callbacks, get(node) != nullptr); + size_t ich = get(node)->m_first_child; + while(ich != NONE) + { + remove_children(ich); + _RYML_CB_ASSERT(m_callbacks, get(ich) != nullptr); + size_t next = get(ich)->m_next_sibling; + _release(ich); + if(ich == get(node)->m_last_child) + break; + ich = next; + } +} + +bool Tree::change_type(size_t node, NodeType type) +{ + _RYML_CB_ASSERT(m_callbacks, type.is_val() || type.is_map() || type.is_seq()); + _RYML_CB_ASSERT(m_callbacks, type.is_val() + type.is_map() + type.is_seq() == 1); + _RYML_CB_ASSERT(m_callbacks, type.has_key() == has_key(node) || (has_key(node) && !type.has_key())); + NodeData *d = _p(node); + if(type.is_map() && is_map(node)) + return false; + else if(type.is_seq() && is_seq(node)) + return false; + else if(type.is_val() && is_val(node)) + return false; + d->m_type = (d->m_type & (~(MAP|SEQ|VAL))) | type; + remove_children(node); + return true; +} + + +//----------------------------------------------------------------------------- +size_t Tree::duplicate(size_t node, size_t parent, size_t after) +{ + return duplicate(this, node, parent, after); +} + +size_t Tree::duplicate(Tree const* src, size_t node, size_t parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, ! src->is_root(node)); + + size_t copy = _claim(); + + _copy_props(copy, src, node); + _set_hierarchy(copy, parent, after); + duplicate_children(src, node, copy, NONE); + + return copy; +} + +//----------------------------------------------------------------------------- +size_t Tree::duplicate_children(size_t node, size_t parent, size_t after) +{ + return duplicate_children(this, node, parent, after); +} + +size_t Tree::duplicate_children(Tree const* src, size_t node, size_t parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); + + size_t prev = after; + for(size_t i = src->first_child(node); i != NONE; i = src->next_sibling(i)) + { + prev = duplicate(src, i, parent, prev); + } + + return prev; +} + +//----------------------------------------------------------------------------- +void Tree::duplicate_contents(size_t node, size_t where) +{ + duplicate_contents(this, node, where); +} + +void Tree::duplicate_contents(Tree const *src, size_t node, size_t where) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, where != NONE); + _copy_props_wo_key(where, src, node); + duplicate_children(src, node, where, last_child(where)); +} + +//----------------------------------------------------------------------------- +size_t Tree::duplicate_children_no_rep(size_t node, size_t parent, size_t after) +{ + return duplicate_children_no_rep(this, node, parent, after); +} + +size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); + + // don't loop using pointers as there may be a relocation + + // find the position where "after" is + size_t after_pos = NONE; + if(after != NONE) + { + for(size_t i = first_child(parent), icount = 0; i != NONE; ++icount, i = next_sibling(i)) + { + if(i == after) + { + after_pos = icount; + break; + } + } + _RYML_CB_ASSERT(m_callbacks, after_pos != NONE); + } + + // for each child to be duplicated... + size_t prev = after; + for(size_t i = src->first_child(node), icount = 0; i != NONE; ++icount, i = src->next_sibling(i)) + { + if(is_seq(parent)) + { + prev = duplicate(i, parent, prev); + } + else + { + _RYML_CB_ASSERT(m_callbacks, is_map(parent)); + // does the parent already have a node with key equal to that of the current duplicate? + size_t rep = NONE, rep_pos = NONE; + for(size_t j = first_child(parent), jcount = 0; j != NONE; ++jcount, j = next_sibling(j)) + { + if(key(j) == key(i)) + { + rep = j; + rep_pos = jcount; + break; + } + } + if(rep == NONE) // there is no repetition; just duplicate + { + prev = duplicate(src, i, parent, prev); + } + else // yes, there is a repetition + { + if(after_pos != NONE && rep_pos < after_pos) + { + // rep is located before the node which will be inserted, + // and will be overridden by the duplicate. So replace it. + remove(rep); + prev = duplicate(src, i, parent, prev); + } + else if(prev == NONE) + { + // first iteration with prev = after = NONE and repetition + prev = rep; + } + else if(rep != prev) + { + // rep is located after the node which will be inserted + // and overrides it. So move the rep into this node's place. + move(rep, prev); + prev = rep; + } + } // there's a repetition + } + } + + return prev; +} + + +//----------------------------------------------------------------------------- + +void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + if(src_node == NONE) + src_node = src->root_id(); + if(dst_node == NONE) + dst_node = root_id(); + _RYML_CB_ASSERT(m_callbacks, src->has_val(src_node) || src->is_seq(src_node) || src->is_map(src_node)); + + if(src->has_val(src_node)) + { + if( ! has_val(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + } + if(src->is_keyval(src_node)) + _copy_props(dst_node, src, src_node); + else if(src->is_val(src_node)) + _copy_props_wo_key(dst_node, src, src_node); + else + C4_NEVER_REACH(); + } + else if(src->is_seq(src_node)) + { + if( ! is_seq(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + _clear_type(dst_node); + if(src->has_key(src_node)) + to_seq(dst_node, src->key(src_node)); + else + to_seq(dst_node); + } + for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + { + size_t dch = append_child(dst_node); + _copy_props_wo_key(dch, src, sch); + merge_with(src, sch, dch); + } + } + else if(src->is_map(src_node)) + { + if( ! is_map(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + _clear_type(dst_node); + if(src->has_key(src_node)) + to_map(dst_node, src->key(src_node)); + else + to_map(dst_node); + } + for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + { + size_t dch = find_child(dst_node, src->key(sch)); + if(dch == NONE) + { + dch = append_child(dst_node); + _copy_props(dch, src, sch); + } + merge_with(src, sch, dch); + } + } + else + { + C4_NEVER_REACH(); + } +} + + +//----------------------------------------------------------------------------- + +namespace detail { +/** @todo make this part of the public API, refactoring as appropriate + * to be able to use the same resolver to handle multiple trees (one + * at a time) */ +struct ReferenceResolver +{ + struct refdata + { + NodeType type; + size_t node; + size_t prev_anchor; + size_t target; + size_t parent_ref; + size_t parent_ref_sibling; + }; + + Tree *t; + /** from the specs: "an alias node refers to the most recent + * node in the serialization having the specified anchor". So + * we need to start looking upward from ref nodes. + * + * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ + stack refs; + + ReferenceResolver(Tree *t_) : t(t_), refs(t_->callbacks()) + { + resolve(); + } + + void store_anchors_and_refs() + { + // minimize (re-)allocations by counting first + size_t num_anchors_and_refs = count_anchors_and_refs(t->root_id()); + if(!num_anchors_and_refs) + return; + refs.reserve(num_anchors_and_refs); + + // now descend through the hierarchy + _store_anchors_and_refs(t->root_id()); + + // finally connect the reference list + size_t prev_anchor = npos; + size_t count = 0; + for(auto &rd : refs) + { + rd.prev_anchor = prev_anchor; + if(rd.type.is_anchor()) + prev_anchor = count; + ++count; + } + } + + size_t count_anchors_and_refs(size_t n) + { + size_t c = 0; + c += t->has_key_anchor(n); + c += t->has_val_anchor(n); + c += t->is_key_ref(n); + c += t->is_val_ref(n); + for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) + c += count_anchors_and_refs(ch); + return c; + } + + void _store_anchors_and_refs(size_t n) + { + if(t->is_key_ref(n) || t->is_val_ref(n) || (t->has_key(n) && t->key(n) == "<<")) + { + if(t->is_seq(n)) + { + // for merging multiple inheritance targets + // <<: [ *CENTER, *BIG ] + for(size_t ich = t->first_child(n); ich != NONE; ich = t->next_sibling(ich)) + { + RYML_ASSERT(t->num_children(ich) == 0); + refs.push({VALREF, ich, npos, npos, n, t->next_sibling(n)}); + } + return; + } + if(t->is_key_ref(n) && t->key(n) != "<<") // insert key refs BEFORE inserting val refs + { + RYML_CHECK((!t->has_key(n)) || t->key(n).ends_with(t->key_ref(n))); + refs.push({KEYREF, n, npos, npos, NONE, NONE}); + } + if(t->is_val_ref(n)) + { + RYML_CHECK((!t->has_val(n)) || t->val(n).ends_with(t->val_ref(n))); + refs.push({VALREF, n, npos, npos, NONE, NONE}); + } + } + if(t->has_key_anchor(n)) + { + RYML_CHECK(t->has_key(n)); + refs.push({KEYANCH, n, npos, npos, NONE, NONE}); + } + if(t->has_val_anchor(n)) + { + RYML_CHECK(t->has_val(n) || t->is_container(n)); + refs.push({VALANCH, n, npos, npos, NONE, NONE}); + } + for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) + { + _store_anchors_and_refs(ch); + } + } + + size_t lookup_(refdata *C4_RESTRICT ra) + { + RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref()); + RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref()); + csubstr refname; + if(ra->type.is_val_ref()) + { + refname = t->val_ref(ra->node); + } + else + { + RYML_ASSERT(ra->type.is_key_ref()); + refname = t->key_ref(ra->node); + } + while(ra->prev_anchor != npos) + { + ra = &refs[ra->prev_anchor]; + if(t->has_anchor(ra->node, refname)) + return ra->node; + } + + #ifndef RYML_ERRMSG_SIZE + #define RYML_ERRMSG_SIZE 1024 + #endif + + char errmsg[RYML_ERRMSG_SIZE]; + snprintf(errmsg, RYML_ERRMSG_SIZE, "anchor does not exist: '%.*s'", + static_cast(refname.size()), refname.data()); + c4::yml::error(errmsg); + return NONE; + } + + void resolve() + { + store_anchors_and_refs(); + if(refs.empty()) + return; + + /* from the specs: "an alias node refers to the most recent + * node in the serialization having the specified anchor". So + * we need to start looking upward from ref nodes. + * + * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ + for(size_t i = 0, e = refs.size(); i < e; ++i) + { + auto &C4_RESTRICT rd = refs.top(i); + if( ! rd.type.is_ref()) + continue; + rd.target = lookup_(&rd); + } + } + +}; // ReferenceResolver +} // namespace detail + +void Tree::resolve() +{ + if(m_size == 0) + return; + + detail::ReferenceResolver rr(this); + + // insert the resolved references + size_t prev_parent_ref = NONE; + size_t prev_parent_ref_after = NONE; + for(auto const& C4_RESTRICT rd : rr.refs) + { + if( ! rd.type.is_ref()) + continue; + if(rd.parent_ref != NONE) + { + _RYML_CB_ASSERT(m_callbacks, is_seq(rd.parent_ref)); + size_t after, p = parent(rd.parent_ref); + if(prev_parent_ref != rd.parent_ref) + { + after = rd.parent_ref;//prev_sibling(rd.parent_ref_sibling); + prev_parent_ref_after = after; + } + else + { + after = prev_parent_ref_after; + } + prev_parent_ref = rd.parent_ref; + prev_parent_ref_after = duplicate_children_no_rep(rd.target, p, after); + remove(rd.node); + } + else + { + if(has_key(rd.node) && is_key_ref(rd.node) && key(rd.node) == "<<") + { + _RYML_CB_ASSERT(m_callbacks, is_keyval(rd.node)); + size_t p = parent(rd.node); + size_t after = prev_sibling(rd.node); + duplicate_children_no_rep(rd.target, p, after); + remove(rd.node); + } + else if(rd.type.is_key_ref()) + { + _RYML_CB_ASSERT(m_callbacks, is_key_ref(rd.node)); + _RYML_CB_ASSERT(m_callbacks, has_key_anchor(rd.target) || has_val_anchor(rd.target)); + if(has_val_anchor(rd.target) && val_anchor(rd.target) == key_ref(rd.node)) + { + _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); + _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); + _p(rd.node)->m_key.scalar = val(rd.target); + _add_flags(rd.node, KEY); + } + else + { + _RYML_CB_CHECK(m_callbacks, key_anchor(rd.target) == key_ref(rd.node)); + _p(rd.node)->m_key.scalar = key(rd.target); + _add_flags(rd.node, VAL); + } + } + else + { + _RYML_CB_ASSERT(m_callbacks, rd.type.is_val_ref()); + if(has_key_anchor(rd.target) && key_anchor(rd.target) == val_ref(rd.node)) + { + _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); + _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); + _p(rd.node)->m_val.scalar = key(rd.target); + _add_flags(rd.node, VAL); + } + else + { + duplicate_contents(rd.target, rd.node); + } + } + } + } + + // clear anchors and refs + for(auto const& C4_RESTRICT ar : rr.refs) + { + rem_anchor_ref(ar.node); + if(ar.parent_ref != NONE) + if(type(ar.parent_ref) != NOTYPE) + remove(ar.parent_ref); + } + +} + +//----------------------------------------------------------------------------- + +size_t Tree::num_children(size_t node) const +{ + size_t count = 0; + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + ++count; + return count; +} + +size_t Tree::child(size_t node, size_t pos) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + size_t count = 0; + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(count++ == pos) + return i; + } + return NONE; +} + +size_t Tree::child_pos(size_t node, size_t ch) const +{ + size_t count = 0; + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(i == ch) + return count; + ++count; + } + return npos; +} + +#if defined(__clang__) +# pragma clang diagnostic push +# pragma GCC diagnostic ignored "-Wnull-dereference" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# if __GNUC__ >= 6 +# pragma GCC diagnostic ignored "-Wnull-dereference" +# endif +#endif + +size_t Tree::find_child(size_t node, csubstr const& name) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, is_map(node)); + if(get(node)->m_first_child == NONE) + { + _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child == NONE); + return NONE; + } + else + { + _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child != NONE); + } + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(_p(i)->m_key.scalar == name) + { + return i; + } + } + return NONE; +} + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + + +//----------------------------------------------------------------------------- + +void Tree::to_val(size_t node, csubstr val, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); + _set_flags(node, VAL|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val = val; +} + +void Tree::to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEYVAL|more_flags); + _p(node)->m_key = key; + _p(node)->m_val = val; +} + +void Tree::to_map(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); // parent must not have children with keys + _set_flags(node, MAP|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_map(size_t node, csubstr key, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEY|MAP|more_flags); + _p(node)->m_key = key; + _p(node)->m_val.clear(); +} + +void Tree::to_seq(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_seq(node)); + _set_flags(node, SEQ|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_seq(size_t node, csubstr key, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEY|SEQ|more_flags); + _p(node)->m_key = key; + _p(node)->m_val.clear(); +} + +void Tree::to_doc(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _set_flags(node, DOC|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_stream(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _set_flags(node, STREAM|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + + +//----------------------------------------------------------------------------- +size_t Tree::num_tag_directives() const +{ + // this assumes we have a very small number of tag directives + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + if(m_tag_directives[i].handle.empty()) + return i; + return RYML_MAX_TAG_DIRECTIVES; +} + +void Tree::clear_tag_directives() +{ + for(TagDirective &td : m_tag_directives) + td = {}; +} + +size_t Tree::add_tag_directive(TagDirective const& td) +{ + _RYML_CB_CHECK(m_callbacks, !td.handle.empty()); + _RYML_CB_CHECK(m_callbacks, !td.prefix.empty()); + _RYML_CB_ASSERT(m_callbacks, td.handle.begins_with('!')); + _RYML_CB_ASSERT(m_callbacks, td.handle.ends_with('!')); + // https://yaml.org/spec/1.2.2/#rule-ns-word-char + _RYML_CB_ASSERT(m_callbacks, td.handle == '!' || td.handle == "!!" || td.handle.trim('!').first_not_of("01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-") == npos); + size_t pos = num_tag_directives(); + _RYML_CB_CHECK(m_callbacks, pos < RYML_MAX_TAG_DIRECTIVES); + m_tag_directives[pos] = td; + return pos; +} + +size_t Tree::resolve_tag(substr output, csubstr tag, size_t node_id) const +{ + // lookup from the end. We want to find the first directive that + // matches the tag and has a target node id leq than the given + // node_id. + for(size_t i = RYML_MAX_TAG_DIRECTIVES-1; i != (size_t)-1; --i) + { + auto const& td = m_tag_directives[i]; + if(td.handle.empty()) + continue; + if(tag.begins_with(td.handle) && td.next_node_id <= node_id) + { + _RYML_CB_ASSERT(m_callbacks, tag.len >= td.handle.len); + csubstr rest = tag.sub(td.handle.len); + size_t len = 1u + td.prefix.len + rest.len + 1u; + size_t numpc = rest.count('%'); + if(numpc == 0) + { + if(len <= output.len) + { + output.str[0] = '<'; + memcpy(1u + output.str, td.prefix.str, td.prefix.len); + memcpy(1u + output.str + td.prefix.len, rest.str, rest.len); + output.str[1u + td.prefix.len + rest.len] = '>'; + } + } + else + { + // need to decode URI % sequences + size_t pos = rest.find('%'); + _RYML_CB_ASSERT(m_callbacks, pos != npos); + do { + size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); + if(next == npos) + next = rest.len; + _RYML_CB_CHECK(m_callbacks, pos+1 < next); + _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); + size_t delta = next - (pos+1); + len -= delta; + pos = rest.find('%', pos+1); + } while(pos != npos); + if(len <= output.len) + { + size_t prev = 0, wpos = 0; + auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; }; + auto appendchar = [&](char c) { output.str[wpos++] = c; }; + appendchar('<'); + appendstr(td.prefix); + pos = rest.find('%'); + _RYML_CB_ASSERT(m_callbacks, pos != npos); + do { + size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); + if(next == npos) + next = rest.len; + _RYML_CB_CHECK(m_callbacks, pos+1 < next); + _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); + uint8_t val; + if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127)) + _RYML_CB_ERR(m_callbacks, "invalid URI character"); + appendstr(rest.range(prev, pos)); + appendchar((char)val); + prev = next; + pos = rest.find('%', pos+1); + } while(pos != npos); + _RYML_CB_ASSERT(m_callbacks, pos == npos); + _RYML_CB_ASSERT(m_callbacks, prev > 0); + _RYML_CB_ASSERT(m_callbacks, rest.len >= prev); + appendstr(rest.sub(prev)); + appendchar('>'); + _RYML_CB_ASSERT(m_callbacks, wpos == len); + } + } + return len; + } + } + return 0; // return 0 to signal that the tag is local and cannot be resolved +} + +namespace { +csubstr _transform_tag(Tree *t, csubstr tag, size_t node) +{ + size_t required_size = t->resolve_tag(substr{}, tag, node); + if(!required_size) + return tag; + const char *prev_arena = t->arena().str; + substr buf = t->alloc_arena(required_size); + _RYML_CB_ASSERT(t->m_callbacks, t->arena().str == prev_arena); + size_t actual_size = t->resolve_tag(buf, tag, node); + _RYML_CB_ASSERT(t->m_callbacks, actual_size <= required_size); + return buf.first(actual_size); +} +void _resolve_tags(Tree *t, size_t node) +{ + for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + { + if(t->has_key(child) && t->has_key_tag(child)) + t->set_key_tag(child, _transform_tag(t, t->key_tag(child), child)); + if(t->has_val(child) && t->has_val_tag(child)) + t->set_val_tag(child, _transform_tag(t, t->val_tag(child), child)); + _resolve_tags(t, child); + } +} +size_t _count_resolved_tags_size(Tree const* t, size_t node) +{ + size_t sz = 0; + for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + { + if(t->has_key(child) && t->has_key_tag(child)) + sz += t->resolve_tag(substr{}, t->key_tag(child), child); + if(t->has_val(child) && t->has_val_tag(child)) + sz += t->resolve_tag(substr{}, t->val_tag(child), child); + sz += _count_resolved_tags_size(t, child); + } + return sz; +} +} // namespace + +void Tree::resolve_tags() +{ + if(empty()) + return; + if(num_tag_directives() == 0) + return; + size_t needed_size = _count_resolved_tags_size(this, root_id()); + if(needed_size) + reserve_arena(arena_size() + needed_size); + _resolve_tags(this, root_id()); +} + + +//----------------------------------------------------------------------------- + +csubstr Tree::lookup_result::resolved() const +{ + csubstr p = path.first(path_pos); + if(p.ends_with('.')) + p = p.first(p.len-1); + return p; +} + +csubstr Tree::lookup_result::unresolved() const +{ + return path.sub(path_pos); +} + +void Tree::_advance(lookup_result *r, size_t more) const +{ + r->path_pos += more; + if(r->path.sub(r->path_pos).begins_with('.')) + ++r->path_pos; +} + +Tree::lookup_result Tree::lookup_path(csubstr path, size_t start) const +{ + if(start == NONE) + start = root_id(); + lookup_result r(path, start); + if(path.empty()) + return r; + _lookup_path(&r); + if(r.target == NONE && r.closest == start) + r.closest = NONE; + return r; +} + +size_t Tree::lookup_path_or_modify(csubstr default_value, csubstr path, size_t start) +{ + size_t target = _lookup_path_or_create(path, start); + if(parent_is_map(target)) + to_keyval(target, key(target), default_value); + else + to_val(target, default_value); + return target; +} + +size_t Tree::lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start) +{ + size_t target = _lookup_path_or_create(path, start); + merge_with(src, src_node, target); + return target; +} + +size_t Tree::_lookup_path_or_create(csubstr path, size_t start) +{ + if(start == NONE) + start = root_id(); + lookup_result r(path, start); + _lookup_path(&r); + if(r.target != NONE) + { + C4_ASSERT(r.unresolved().empty()); + return r.target; + } + _lookup_path_modify(&r); + return r.target; +} + +void Tree::_lookup_path(lookup_result *r) const +{ + C4_ASSERT( ! r->unresolved().empty()); + _lookup_path_token parent{"", type(r->closest)}; + size_t node; + do + { + node = _next_node(r, &parent); + if(node != NONE) + r->closest = node; + if(r->unresolved().empty()) + { + r->target = node; + return; + } + } while(node != NONE); +} + +void Tree::_lookup_path_modify(lookup_result *r) +{ + C4_ASSERT( ! r->unresolved().empty()); + _lookup_path_token parent{"", type(r->closest)}; + size_t node; + do + { + node = _next_node_modify(r, &parent); + if(node != NONE) + r->closest = node; + if(r->unresolved().empty()) + { + r->target = node; + return; + } + } while(node != NONE); +} + +size_t Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const +{ + _lookup_path_token token = _next_token(r, *parent); + if( ! token) + return NONE; + + size_t node = NONE; + csubstr prev = token.value; + if(token.type == MAP || token.type == SEQ) + { + _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); + //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); + _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); + node = find_child(r->closest, token.value); + } + else if(token.type == KEYVAL) + { + _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); + if(is_map(r->closest)) + node = find_child(r->closest, token.value); + } + else if(token.type == KEY) + { + _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); + token.value = token.value.offs(1, 1).trim(' '); + size_t idx = 0; + _RYML_CB_CHECK(m_callbacks, from_chars(token.value, &idx)); + node = child(r->closest, idx); + } + else + { + C4_NEVER_REACH(); + } + + if(node != NONE) + { + *parent = token; + } + else + { + csubstr p = r->path.sub(r->path_pos > 0 ? r->path_pos - 1 : r->path_pos); + r->path_pos -= prev.len; + if(p.begins_with('.')) + r->path_pos -= 1u; + } + + return node; +} + +size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) +{ + _lookup_path_token token = _next_token(r, *parent); + if( ! token) + return NONE; + + size_t node = NONE; + if(token.type == MAP || token.type == SEQ) + { + _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); + //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); + if( ! is_container(r->closest)) + { + if(has_key(r->closest)) + to_map(r->closest, key(r->closest)); + else + to_map(r->closest); + } + else + { + if(is_map(r->closest)) + node = find_child(r->closest, token.value); + else + { + size_t pos = NONE; + _RYML_CB_CHECK(m_callbacks, c4::atox(token.value, &pos)); + _RYML_CB_ASSERT(m_callbacks, pos != NONE); + node = child(r->closest, pos); + } + } + if(node == NONE) + { + _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); + node = append_child(r->closest); + NodeData *n = _p(node); + n->m_key.scalar = token.value; + n->m_type.add(KEY); + } + } + else if(token.type == KEYVAL) + { + _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); + if(is_map(r->closest)) + { + node = find_child(r->closest, token.value); + if(node == NONE) + node = append_child(r->closest); + } + else + { + _RYML_CB_ASSERT(m_callbacks, !is_seq(r->closest)); + _add_flags(r->closest, MAP); + node = append_child(r->closest); + } + NodeData *n = _p(node); + n->m_key.scalar = token.value; + n->m_val.scalar = ""; + n->m_type.add(KEYVAL); + } + else if(token.type == KEY) + { + _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); + token.value = token.value.offs(1, 1).trim(' '); + size_t idx; + if( ! from_chars(token.value, &idx)) + return NONE; + if( ! is_container(r->closest)) + { + if(has_key(r->closest)) + { + csubstr k = key(r->closest); + _clear_type(r->closest); + to_seq(r->closest, k); + } + else + { + _clear_type(r->closest); + to_seq(r->closest); + } + } + _RYML_CB_ASSERT(m_callbacks, is_container(r->closest)); + node = child(r->closest, idx); + if(node == NONE) + { + _RYML_CB_ASSERT(m_callbacks, num_children(r->closest) <= idx); + for(size_t i = num_children(r->closest); i <= idx; ++i) + { + node = append_child(r->closest); + if(i < idx) + { + if(is_map(r->closest)) + to_keyval(node, /*"~"*/{}, /*"~"*/{}); + else if(is_seq(r->closest)) + to_val(node, /*"~"*/{}); + } + } + } + } + else + { + C4_NEVER_REACH(); + } + + _RYML_CB_ASSERT(m_callbacks, node != NONE); + *parent = token; + return node; +} + +/** types of tokens: + * - seeing "map." ---> "map"/MAP + * - finishing "scalar" ---> "scalar"/KEYVAL + * - seeing "seq[n]" ---> "seq"/SEQ (--> "[n]"/KEY) + * - seeing "[n]" ---> "[n]"/KEY + */ +Tree::_lookup_path_token Tree::_next_token(lookup_result *r, _lookup_path_token const& parent) const +{ + csubstr unres = r->unresolved(); + if(unres.empty()) + return {}; + + // is it an indexation like [0], [1], etc? + if(unres.begins_with('[')) + { + size_t pos = unres.find(']'); + if(pos == csubstr::npos) + return {}; + csubstr idx = unres.first(pos + 1); + _advance(r, pos + 1); + return {idx, KEY}; + } + + // no. so it must be a name + size_t pos = unres.first_of(".["); + if(pos == csubstr::npos) + { + _advance(r, unres.len); + NodeType t; + if(( ! parent) || parent.type.is_seq()) + return {unres, VAL}; + return {unres, KEYVAL}; + } + + // it's either a map or a seq + _RYML_CB_ASSERT(m_callbacks, unres[pos] == '.' || unres[pos] == '['); + if(unres[pos] == '.') + { + _RYML_CB_ASSERT(m_callbacks, pos != 0); + _advance(r, pos + 1); + return {unres.first(pos), MAP}; + } + + _RYML_CB_ASSERT(m_callbacks, unres[pos] == '['); + _advance(r, pos); + return {unres.first(pos), SEQ}; +} + + +} // namespace ryml +} // namespace c4 + + +C4_SUPPRESS_WARNING_GCC_POP +C4_SUPPRESS_WARNING_MSVC_POP + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/parse.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp +//#include "c4/yml/parse.hpp" +#if !defined(C4_YML_PARSE_HPP_) && !defined(_C4_YML_PARSE_HPP_) +#error "amalgamate: file c4/yml/parse.hpp must have been included at this point" +#endif /* C4_YML_PARSE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/utf.hpp +//#include "c4/utf.hpp" +#if !defined(C4_UTF_HPP_) && !defined(_C4_UTF_HPP_) +#error "amalgamate: file c4/utf.hpp must have been included at this point" +#endif /* C4_UTF_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp +//#include +#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_) +#error "amalgamate: file c4/dump.hpp must have been included at this point" +#endif /* C4_DUMP_HPP_ */ + + +//included above: +//#include +//included above: +//#include +//included above: +//#include + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + +#ifdef RYML_DBG +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp +//#include "c4/yml/detail/print.hpp" +#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_) +#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PRINT_HPP_ */ + +#endif + +#ifndef RYML_ERRMSG_SIZE + #define RYML_ERRMSG_SIZE 1024 +#endif + +//#define RYML_WITH_TAB_TOKENS +#ifdef RYML_WITH_TAB_TOKENS +#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__ +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with +#else +#define _RYML_WITH_TAB_TOKENS(...) +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without +#endif + + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +#elif defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma clang diagnostic ignored "-Wformat-nonliteral" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +# if __GNUC__ >= 7 +# pragma GCC diagnostic ignored "-Wduplicated-branches" +# endif +#endif + +namespace c4 { +namespace yml { + +namespace { + +template +void _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args) +{ + char writebuf[256]; + auto results = c4::format_dump_resume(dumpfn, writebuf, fmt, std::forward(args)...); + // resume writing if the results failed to fit the buffer + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. + { + results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward(args)...); + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) + { + results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward(args)...); + } + } +} + +bool _is_scalar_next__runk(csubstr s) +{ + return !(s.begins_with(": ") || s.begins_with_any("#,{}[]%&") || s.begins_with("? ") || s == "-" || s.begins_with("- ") || s.begins_with(":\"") || s.begins_with(":'")); +} + +bool _is_scalar_next__rseq_rval(csubstr s) +{ + return !(s.begins_with_any("[{!&") || s.begins_with("? ") || s.begins_with("- ") || s == "-"); +} + +bool _is_scalar_next__rmap(csubstr s) +{ + return !(s.begins_with(": ") || s.begins_with_any("#,!&") || s.begins_with("? ") _RYML_WITH_TAB_TOKENS(|| s.begins_with(":\t"))); +} + +bool _is_scalar_next__rmap_val(csubstr s) +{ + return !(s.begins_with("- ") || s.begins_with_any("{[") || s == "-"); +} + +bool _is_doc_sep(csubstr s) +{ + constexpr const csubstr dashes = "---"; + constexpr const csubstr ellipsis = "..."; + constexpr const csubstr whitesp = " \t"; + if(s.begins_with(dashes)) + return s == dashes || s.sub(3).begins_with_any(whitesp); + else if(s.begins_with(ellipsis)) + return s == ellipsis || s.sub(3).begins_with_any(whitesp); + return false; +} + +/** @p i is set to the first non whitespace character after the line + * @return the number of empty lines after the initial position */ +size_t count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation) +{ + RYML_ASSERT(r[*i] == '\n'); + size_t numnl_following = 0; + ++(*i); + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] == '\n') + { + ++numnl_following; + if(indentation) // skip the indentation after the newline + { + size_t stop = *i + indentation; + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] != ' ' && r.str[*i] != '\r') + break; + RYML_ASSERT(*i < stop); + } + C4_UNUSED(stop); + } + } + else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') // skip leading whitespace + ; + else + break; + } + return numnl_following; +} + +} // anon namespace + + +//----------------------------------------------------------------------------- + +Parser::~Parser() +{ + _free(); + _clr(); +} + +Parser::Parser(Callbacks const& cb, ParserOptions opts) + : m_options(opts) + , m_file() + , m_buf() + , m_root_id(NONE) + , m_tree() + , m_stack(cb) + , m_state() + , m_key_tag_indentation(0) + , m_key_tag2_indentation(0) + , m_key_tag() + , m_key_tag2() + , m_val_tag_indentation(0) + , m_val_tag() + , m_key_anchor_was_before(false) + , m_key_anchor_indentation(0) + , m_key_anchor() + , m_val_anchor_indentation(0) + , m_val_anchor() + , m_filter_arena() + , m_newline_offsets() + , m_newline_offsets_size(0) + , m_newline_offsets_capacity(0) + , m_newline_offsets_buf() +{ + m_stack.push(State{}); + m_state = &m_stack.top(); +} + +Parser::Parser(Parser &&that) + : m_options(that.m_options) + , m_file(that.m_file) + , m_buf(that.m_buf) + , m_root_id(that.m_root_id) + , m_tree(that.m_tree) + , m_stack(std::move(that.m_stack)) + , m_state(&m_stack.top()) + , m_key_tag_indentation(that.m_key_tag_indentation) + , m_key_tag2_indentation(that.m_key_tag2_indentation) + , m_key_tag(that.m_key_tag) + , m_key_tag2(that.m_key_tag2) + , m_val_tag_indentation(that.m_val_tag_indentation) + , m_val_tag(that.m_val_tag) + , m_key_anchor_was_before(that.m_key_anchor_was_before) + , m_key_anchor_indentation(that.m_key_anchor_indentation) + , m_key_anchor(that.m_key_anchor) + , m_val_anchor_indentation(that.m_val_anchor_indentation) + , m_val_anchor(that.m_val_anchor) + , m_filter_arena(that.m_filter_arena) + , m_newline_offsets(that.m_newline_offsets) + , m_newline_offsets_size(that.m_newline_offsets_size) + , m_newline_offsets_capacity(that.m_newline_offsets_capacity) + , m_newline_offsets_buf(that.m_newline_offsets_buf) +{ + that._clr(); +} + +Parser::Parser(Parser const& that) + : m_options(that.m_options) + , m_file(that.m_file) + , m_buf(that.m_buf) + , m_root_id(that.m_root_id) + , m_tree(that.m_tree) + , m_stack(that.m_stack) + , m_state(&m_stack.top()) + , m_key_tag_indentation(that.m_key_tag_indentation) + , m_key_tag2_indentation(that.m_key_tag2_indentation) + , m_key_tag(that.m_key_tag) + , m_key_tag2(that.m_key_tag2) + , m_val_tag_indentation(that.m_val_tag_indentation) + , m_val_tag(that.m_val_tag) + , m_key_anchor_was_before(that.m_key_anchor_was_before) + , m_key_anchor_indentation(that.m_key_anchor_indentation) + , m_key_anchor(that.m_key_anchor) + , m_val_anchor_indentation(that.m_val_anchor_indentation) + , m_val_anchor(that.m_val_anchor) + , m_filter_arena() + , m_newline_offsets() + , m_newline_offsets_size() + , m_newline_offsets_capacity() + , m_newline_offsets_buf() +{ + if(that.m_newline_offsets_capacity) + { + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + } + if(that.m_filter_arena.len) + { + _resize_filter_arena(that.m_filter_arena.len); + } +} + +Parser& Parser::operator=(Parser &&that) +{ + _free(); + m_options = (that.m_options); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_root_id = (that.m_root_id); + m_tree = (that.m_tree); + m_stack = std::move(that.m_stack); + m_state = (&m_stack.top()); + m_key_tag_indentation = (that.m_key_tag_indentation); + m_key_tag2_indentation = (that.m_key_tag2_indentation); + m_key_tag = (that.m_key_tag); + m_key_tag2 = (that.m_key_tag2); + m_val_tag_indentation = (that.m_val_tag_indentation); + m_val_tag = (that.m_val_tag); + m_key_anchor_was_before = (that.m_key_anchor_was_before); + m_key_anchor_indentation = (that.m_key_anchor_indentation); + m_key_anchor = (that.m_key_anchor); + m_val_anchor_indentation = (that.m_val_anchor_indentation); + m_val_anchor = (that.m_val_anchor); + m_filter_arena = that.m_filter_arena; + m_newline_offsets = (that.m_newline_offsets); + m_newline_offsets_size = (that.m_newline_offsets_size); + m_newline_offsets_capacity = (that.m_newline_offsets_capacity); + m_newline_offsets_buf = (that.m_newline_offsets_buf); + that._clr(); + return *this; +} + +Parser& Parser::operator=(Parser const& that) +{ + _free(); + m_options = (that.m_options); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_root_id = (that.m_root_id); + m_tree = (that.m_tree); + m_stack = that.m_stack; + m_state = &m_stack.top(); + m_key_tag_indentation = (that.m_key_tag_indentation); + m_key_tag2_indentation = (that.m_key_tag2_indentation); + m_key_tag = (that.m_key_tag); + m_key_tag2 = (that.m_key_tag2); + m_val_tag_indentation = (that.m_val_tag_indentation); + m_val_tag = (that.m_val_tag); + m_key_anchor_was_before = (that.m_key_anchor_was_before); + m_key_anchor_indentation = (that.m_key_anchor_indentation); + m_key_anchor = (that.m_key_anchor); + m_val_anchor_indentation = (that.m_val_anchor_indentation); + m_val_anchor = (that.m_val_anchor); + if(that.m_filter_arena.len > 0) + _resize_filter_arena(that.m_filter_arena.len); + if(that.m_newline_offsets_capacity > m_newline_offsets_capacity) + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + m_newline_offsets_buf = that.m_newline_offsets_buf; + return *this; +} + +void Parser::_clr() +{ + m_options = {}; + m_file = {}; + m_buf = {}; + m_root_id = {}; + m_tree = {}; + m_stack.clear(); + m_state = {}; + m_key_tag_indentation = {}; + m_key_tag2_indentation = {}; + m_key_tag = {}; + m_key_tag2 = {}; + m_val_tag_indentation = {}; + m_val_tag = {}; + m_key_anchor_was_before = {}; + m_key_anchor_indentation = {}; + m_key_anchor = {}; + m_val_anchor_indentation = {}; + m_val_anchor = {}; + m_filter_arena = {}; + m_newline_offsets = {}; + m_newline_offsets_size = {}; + m_newline_offsets_capacity = {}; + m_newline_offsets_buf = {}; +} + +void Parser::_free() +{ + if(m_newline_offsets) + { + _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = nullptr; + m_newline_offsets_size = 0u; + m_newline_offsets_capacity = 0u; + m_newline_offsets_buf = 0u; + } + if(m_filter_arena.len) + { + _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); + m_filter_arena = {}; + } + m_stack._free(); +} + + +//----------------------------------------------------------------------------- +void Parser::_reset() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() == 1); + m_stack.clear(); + m_stack.push({}); + m_state = &m_stack.top(); + m_state->reset(m_file.str, m_root_id); + + m_key_tag_indentation = 0; + m_key_tag2_indentation = 0; + m_key_tag.clear(); + m_key_tag2.clear(); + m_val_tag_indentation = 0; + m_val_tag.clear(); + m_key_anchor_was_before = false; + m_key_anchor_indentation = 0; + m_key_anchor.clear(); + m_val_anchor_indentation = 0; + m_val_anchor.clear(); + + if(m_options.locations()) + { + _prepare_locations(); + } +} + +//----------------------------------------------------------------------------- +template +void Parser::_fmt_msg(DumpFn &&dumpfn) const +{ + auto const& lc = m_state->line_contents; + csubstr contents = lc.stripped; + if(contents.len) + { + // print the yaml src line + size_t offs = 3u + to_chars(substr{}, m_state->pos.line) + to_chars(substr{}, m_state->pos.col); + if(m_file.len) + { + _parse_dump(dumpfn, "{}:", m_file); + offs += m_file.len + 1; + } + _parse_dump(dumpfn, "{}:{}: ", m_state->pos.line, m_state->pos.col); + csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u)); + csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("...")); + _parse_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len); + // highlight the remaining portion of the previous line + size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin()); + size_t lastcol = firstcol + lc.rem.len; + for(size_t i = 0; i < offs + firstcol; ++i) + dumpfn(" "); + dumpfn("^"); + for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i) + dumpfn("~"); + _parse_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1); + } + else + { + dumpfn("\n"); + } + +#ifdef RYML_DBG + // next line: print the state flags + { + char flagbuf_[64]; + _parse_dump(dumpfn, "top state: {}\n", _prfl(flagbuf_, m_state->flags)); + } +#endif +} + + +//----------------------------------------------------------------------------- +template +void Parser::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const +{ + char errmsg[RYML_ERRMSG_SIZE]; + detail::_SubstrWriter writer(errmsg); + auto dumpfn = [&writer](csubstr s){ writer.append(s); }; + _parse_dump(dumpfn, fmt, args...); + writer.append('\n'); + _fmt_msg(dumpfn); + size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE; + m_tree->m_callbacks.m_error(errmsg, len, m_state->pos, m_tree->m_callbacks.m_user_data); +} + +//----------------------------------------------------------------------------- +#ifdef RYML_DBG +template +void Parser::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const +{ + auto dumpfn = [](csubstr s){ fwrite(s.str, 1, s.len, stdout); }; + _parse_dump(dumpfn, fmt, args...); + dumpfn("\n"); + _fmt_msg(dumpfn); +} +#endif + +//----------------------------------------------------------------------------- +bool Parser::_finished_file() const +{ + bool ret = m_state->pos.offset >= m_buf.len; + if(ret) + { + _c4dbgp("finished file!!!"); + } + return ret; +} + +//----------------------------------------------------------------------------- +bool Parser::_finished_line() const +{ + return m_state->line_contents.rem.empty(); +} + +//----------------------------------------------------------------------------- +void Parser::parse_in_place(csubstr file, substr buf, Tree *t, size_t node_id) +{ + m_file = file; + m_buf = buf; + m_root_id = node_id; + m_tree = t; + _reset(); + while( ! _finished_file()) + { + _scan_line(); + while( ! _finished_line()) + _handle_line(); + if(_finished_file()) + break; // it may have finished because of multiline blocks + _line_ended(); + } + _handle_finished_file(); +} + +//----------------------------------------------------------------------------- +void Parser::_handle_finished_file() +{ + _end_stream(); +} + +//----------------------------------------------------------------------------- +void Parser::_handle_line() +{ + _c4dbgq("\n-----------"); + _c4dbgt("handling line={}, offset={}B", m_state->pos.line, m_state->pos.offset); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_state->line_contents.rem.empty()); + if(has_any(RSEQ)) + { + if(has_any(FLOW)) + { + if(_handle_seq_flow()) + return; + } + else + { + if(_handle_seq_blck()) + return; + } + } + else if(has_any(RMAP)) + { + if(has_any(FLOW)) + { + if(_handle_map_flow()) + return; + } + else + { + if(_handle_map_blck()) + return; + } + } + else if(has_any(RUNK)) + { + if(_handle_unk()) + return; + } + + if(_handle_top()) + return; +} + + +//----------------------------------------------------------------------------- +bool Parser::_handle_unk() +{ + _c4dbgp("handle_unk"); + + csubstr rem = m_state->line_contents.rem; + const bool start_as_child = (node(m_state) == nullptr); + + if(C4_UNLIKELY(has_any(NDOC))) + { + if(rem == "---" || rem.begins_with("--- ")) + { + _start_new_doc(rem); + return true; + } + auto trimmed = rem.triml(' '); + if(trimmed == "---" || trimmed.begins_with("--- ")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len >= trimmed.len); + _line_progressed(rem.len - trimmed.len); + _start_new_doc(trimmed); + _save_indentation(); + return true; + } + else if(trimmed.begins_with("...")) + { + _end_stream(); + } + else if(trimmed.first_of("#%") == csubstr::npos) // neither a doc nor a tag + { + _c4dbgpf("starting implicit doc to accomodate unexpected tokens: '{}'", rem); + size_t indref = m_state->indref; + _push_level(); + _start_doc(); + _set_indentation(indref); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, !trimmed.empty()); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP)); + if(m_state->indref > 0) + { + csubstr ws = rem.left_of(rem.first_not_of(' ')); + if(m_state->indref <= ws.len) + { + _c4dbgpf("skipping base indentation of {}", m_state->indref); + _line_progressed(m_state->indref); + rem = rem.sub(m_state->indref); + } + } + + if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + _c4dbgpf("it's a seq (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_seq(start_as_child); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem == '-') + { + _c4dbgpf("it's a seq (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_seq(start_as_child); + _save_indentation(); + _line_progressed(1); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgpf("it's a seq, flow (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(/*explicit flow*/true); + _start_seq(start_as_child); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgpf("it's a map, flow (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(/*explicit flow*/true); + _start_map(start_as_child); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem.begins_with("? ")) + { + _c4dbgpf("it's a map (as_child={}) + this key is complex", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_map(start_as_child); + addrem_flags(RKEY|QMRK, RVAL); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem.begins_with(": ") && !has_all(SSCL)) + { + _c4dbgp("it's a map with an empty key"); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_map(start_as_child); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem == ':' && !has_all(SSCL)) + { + _c4dbgp("it's a map with an empty key"); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_map(start_as_child); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + _save_indentation(); + _line_progressed(1); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(!rem.begins_with('*') && _handle_key_anchors_and_refs()) + { + return true; + } + else if(has_all(SSCL)) + { + _c4dbgpf("there's a stored scalar: '{}'", m_state->scalar); + + csubstr saved_scalar; + bool is_quoted; + if(_scan_scalar_unk(&saved_scalar, &is_quoted)) + { + rem = m_state->line_contents.rem; + _c4dbgpf("... and there's also a scalar next! '{}'", saved_scalar); + if(rem.begins_with_any(" \t")) + { + size_t n = rem.first_not_of(" \t"); + _c4dbgpf("skipping {} spaces/tabs", n); + rem = rem.sub(n); + _line_progressed(n); + } + } + + _c4dbgpf("rem='{}'", rem); + + if(rem.begins_with(", ")) + { + _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); + _start_seq(start_as_child); + add_flags(FLOW); + _append_val(_consume_scalar()); + _line_progressed(2); + } + else if(rem.begins_with(',')) + { + _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); + _start_seq(start_as_child); + add_flags(FLOW); + _append_val(_consume_scalar()); + _line_progressed(1); + } + else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("got a ': ' -- it's a map (as_child={})", start_as_child); + _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair + _line_progressed(2); + } + else if(rem == ":" || rem.begins_with(":\"") || rem.begins_with(":'")) + { + if(rem == ":") { _c4dbgpf("got a ':' -- it's a map (as_child={})", start_as_child); } + else { _c4dbgpf("got a '{}' -- it's a map (as_child={})", rem.first(2), start_as_child); } + _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair + _line_progressed(1); // advance only 1 + } + else if(rem.begins_with('}')) + { + if(!has_all(RMAP|FLOW)) + { + _c4err("invalid token: not reading a map"); + } + if(!has_all(SSCL)) + { + _c4err("no scalar stored"); + } + _append_key_val(saved_scalar); + _stop_map(); + _line_progressed(1); + } + else if(rem.begins_with("...")) + { + _c4dbgp("got stream end '...'"); + _end_stream(); + _line_progressed(3); + } + else if(rem.begins_with('#')) + { + _c4dbgpf("it's a comment: '{}'", rem); + _scan_comment(); + return true; + } + else if(_handle_key_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with(" ") || rem.begins_with("\t")) + { + size_t n = rem.first_not_of(" \t"); + if(n == npos) + n = rem.len; + _c4dbgpf("has {} spaces/tabs, skip...", n); + _line_progressed(n); + return true; + } + else if(rem.empty()) + { + // nothing to do + } + else if(rem == "---" || rem.begins_with("--- ")) + { + _c4dbgp("caught ---: starting doc"); + _start_new_doc(rem); + return true; + } + else if(rem.begins_with('%')) + { + _c4dbgp("caught a directive: ignoring..."); + _line_progressed(rem.len); + return true; + } + else + { + _c4err("parse error"); + } + + if( ! saved_scalar.empty()) + { + _store_scalar(saved_scalar, is_quoted); + } + + return true; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL)); + csubstr scalar; + size_t indentation = m_state->line_contents.indentation; // save + bool is_quoted; + if(_scan_scalar_unk(&scalar, &is_quoted)) + { + _c4dbgpf("got a {} scalar", is_quoted ? "quoted" : ""); + rem = m_state->line_contents.rem; + { + size_t first = rem.first_not_of(" \t"); + if(first && first != npos) + { + _c4dbgpf("skip {} whitespace characters", first); + _line_progressed(first); + rem = rem.sub(first); + } + } + _store_scalar(scalar, is_quoted); + if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("got a ': ' next -- it's a map (as_child={})", start_as_child); + _push_level(); + _start_map(start_as_child); // wait for the val scalar to append the key-val pair + _set_indentation(indentation); + _line_progressed(2); // call this AFTER saving the indentation + } + else if(rem == ":") + { + _c4dbgpf("got a ':' next -- it's a map (as_child={})", start_as_child); + _push_level(); + _start_map(start_as_child); // wait for the val scalar to append the key-val pair + _set_indentation(indentation); + _line_progressed(1); // call this AFTER saving the indentation + } + else + { + // we still don't know whether it's a seq or a map + // so just store the scalar + } + return true; + } + else if(rem.begins_with_any(" \t")) + { + csubstr ws = rem.left_of(rem.first_not_of(" \t")); + rem = rem.right_of(ws); + if(has_all(RTOP) && rem.begins_with("---")) + { + _c4dbgp("there's a doc starting, and it's indented"); + _set_indentation(ws.len); + } + _c4dbgpf("skipping {} spaces/tabs", ws.len); + _line_progressed(ws.len); + return true; + } + } + + return false; +} + + +//----------------------------------------------------------------------------- +C4_ALWAYS_INLINE void Parser::_skipchars(char c) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with(c)); + size_t pos = m_state->line_contents.rem.first_not_of(c); + if(pos == npos) + pos = m_state->line_contents.rem.len; // maybe the line is just whitespace + _c4dbgpf("skip {} '{}'", pos, c); + _line_progressed(pos); +} + +template +C4_ALWAYS_INLINE void Parser::_skipchars(const char (&chars)[N]) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with_any(chars)); + size_t pos = m_state->line_contents.rem.first_not_of(chars); + if(pos == npos) + pos = m_state->line_contents.rem.len; // maybe the line is just whitespace + _c4dbgpf("skip {} characters", pos); + _line_progressed(pos); +} + + +//----------------------------------------------------------------------------- +bool Parser::_handle_seq_flow() +{ + _c4dbgpf("handle_seq_flow: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); + + if(rem.begins_with(' ')) + { + // with explicit flow, indentation does not matter + _c4dbgp("starts with spaces"); + _skipchars(' '); + return true; + } + _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) + { + _c4dbgp("starts with tabs"); + _skipchars('\t'); + return true; + }) + else if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); // also progresses the line + return true; + } + else if(rem.begins_with(']')) + { + _c4dbgp("end the sequence"); + _pop_level(); + _line_progressed(1); + if(has_all(RSEQIMAP)) + { + _stop_seqimap(); + _pop_level(); + } + return true; + } + + if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + bool is_quoted; + if(_scan_scalar_seq_flow(&rem, &is_quoted)) + { + _c4dbgp("it's a scalar"); + addrem_flags(RNXT, RVAL); + _append_val(rem, is_quoted); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_map(); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem == ':') + { + _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(1); + return true; + } + else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(2); + return true; + } + else if(rem.begins_with("? ")) + { + _c4dbgpf("found '? ' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(2); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(SSCL) && m_state->scalar == ""); + addrem_flags(QMRK|RKEY, RVAL|SSCL); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with(", ")) + { + _c4dbgp("found ',' -- the value was null"); + _append_val_null(rem.str - 1); + _line_progressed(2); + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("found ',' -- the value was null"); + _append_val_null(rem.str - 1); + _line_progressed(1); + return true; + } + else if(rem.begins_with('\t')) + { + _skipchars('\t'); + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + if(rem.begins_with(", ")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); + _c4dbgp("seq: expect next val"); + addrem_flags(RVAL, RNXT); + _line_progressed(2); + return true; + } + else if(rem.begins_with(',')) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); + _c4dbgp("seq: expect next val"); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + return true; + } + else if(rem == ':') + { + _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(1); + return true; + } + else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(2); + return true; + } + else + { + _c4err("was expecting a comma"); + } + } + else + { + _c4err("internal error"); + } + + return true; +} + +//----------------------------------------------------------------------------- +bool Parser::_handle_seq_blck() +{ + _c4dbgpf("handle_seq_impl: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + + if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); + return true; + } + if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + + if(_handle_indentation()) + return true; + + if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + _c4dbgp("expect another val"); + addrem_flags(RVAL, RNXT); + _line_progressed(2); + return true; + } + else if(rem == '-') + { + _c4dbgp("expect another val"); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + return true; + } + else if(rem.begins_with_any(" \t")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); + _skipchars(" \t"); + return true; + } + else if(rem.begins_with("...")) + { + _c4dbgp("got stream end '...'"); + _end_stream(); + _line_progressed(3); + return true; + } + else if(rem.begins_with("---")) + { + _c4dbgp("got document start '---'"); + _start_new_doc(rem); + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + // there can be empty values + if(_handle_indentation()) + return true; + + csubstr s; + bool is_quoted; + if(_scan_scalar_seq_blck(&s, &is_quoted)) // this also progresses the line + { + _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); + + rem = m_state->line_contents.rem; + if(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(rem.begins_with_any(" \t"), rem.begins_with(' '))) + { + _c4dbgp("skipping whitespace..."); + size_t skip = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(skip == csubstr::npos) + skip = rem.len; // maybe the line is just whitespace + _line_progressed(skip); + rem = rem.sub(skip); + } + + _c4dbgpf("rem=[{}]~~~{}~~~", rem.len, rem); + if(!rem.begins_with('#') && (rem.ends_with(':') || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) + { + _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); + if(m_key_anchor.empty()) + _move_val_anchor_to_key_anchor(); + if(m_key_tag.empty()) + _move_val_tag_to_key_tag(); + addrem_flags(RNXT, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT + _push_level(); + _start_map(); + _store_scalar(s, is_quoted); + if( ! _maybe_set_indentation_from_anchor_or_tag()) + { + _c4dbgpf("set indentation from scalar: {}", m_state->scalar_col); + _set_indentation(m_state->scalar_col); // this is the column where the scalar starts + } + _move_key_tag2_to_key_tag(); + addrem_flags(RVAL, RKEY); + _line_progressed(1); + } + else + { + _c4dbgp("appending val to current seq"); + _append_val(s, is_quoted); + addrem_flags(RNXT, RVAL); + } + return true; + } + else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + if(_rval_dash_start_or_continue_seq()) + _line_progressed(2); + return true; + } + else if(rem == '-') + { + if(_rval_dash_start_or_continue_seq()) + _line_progressed(1); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq, flow"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map, flow"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_map(); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem.begins_with("? ")) + { + _c4dbgp("val is a child map + this key is complex"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(); + _start_map(); + addrem_flags(QMRK|RKEY, RVAL); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem.begins_with(' ')) + { + csubstr spc = rem.left_of(rem.first_not_of(' ')); + if(_at_line_begin()) + { + _c4dbgpf("skipping value indentation: {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + else + { + _c4dbgpf("skipping {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + /* pathological case: + * - &key : val + * - &key : + * - : val + */ + else if((!has_all(SSCL)) && + (rem.begins_with(": ") || rem.left_of(rem.find("#")).trimr("\t") == ":")) + { + if(!m_val_anchor.empty() || !m_val_tag.empty()) + { + _c4dbgp("val is a child map + this key is empty, with anchors or tags"); + addrem_flags(RNXT, RVAL); // before _push_level! + _move_val_tag_to_key_tag(); + _move_val_anchor_to_key_anchor(); + _push_level(); + _start_map(); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + RYML_CHECK(_maybe_set_indentation_from_anchor_or_tag()); // one of them must exist + _line_progressed(rem.begins_with(": ") ? 2u : 1u); + return true; + } + else + { + _c4dbgp("val is a child map + this key is empty, no anchors or tags"); + addrem_flags(RNXT, RVAL); // before _push_level! + size_t ind = m_state->indref; + _push_level(); + _start_map(); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + _c4dbgpf("set indentation from map anchor: {}", ind + 2); + _set_indentation(ind + 2); // this is the column where the map starts + _line_progressed(rem.begins_with(": ") ? 2u : 1u); + return true; + } + } + else + { + _c4err("parse error"); + } + } + + return false; +} + +//----------------------------------------------------------------------------- + +bool Parser::_rval_dash_start_or_continue_seq() +{ + size_t ind = m_state->line_contents.current_col(); + _RYML_CB_ASSERT(m_stack.m_callbacks, ind >= m_state->indref); + size_t delta_ind = ind - m_state->indref; + if( ! delta_ind) + { + _c4dbgp("prev val was empty"); + addrem_flags(RNXT, RVAL); + _append_val_null(&m_state->line_contents.full[ind]); + return false; + } + _c4dbgp("val is a nested seq, indented"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(); + _start_seq(); + _save_indentation(); + return true; +} + +//----------------------------------------------------------------------------- +bool Parser::_handle_map_flow() +{ + // explicit flow, ie, inside {}, separated by commas + _c4dbgpf("handle_map_flow: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP|FLOW)); + + if(rem.begins_with(' ')) + { + // with explicit flow, indentation does not matter + _c4dbgp("starts with spaces"); + _skipchars(' '); + return true; + } + _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) + { + // with explicit flow, indentation does not matter + _c4dbgp("starts with tabs"); + _skipchars('\t'); + return true; + }) + else if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); // also progresses the line + return true; + } + else if(rem.begins_with('}')) + { + _c4dbgp("end the map"); + if(has_all(SSCL)) + { + _c4dbgp("the last val was null"); + _append_key_val_null(rem.str - 1); + rem_flags(RVAL); + } + _pop_level(); + _line_progressed(1); + if(has_all(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + return true; + } + + if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RSEQIMAP)); + + if(rem.begins_with(", ")) + { + _c4dbgp("seq: expect next keyval"); + addrem_flags(RKEY, RNXT); + _line_progressed(2); + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("seq: expect next keyval"); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + + bool is_quoted; + if(has_none(SSCL) && _scan_scalar_map_flow(&rem, &is_quoted)) + { + _c4dbgp("it's a scalar"); + _store_scalar(rem, is_quoted); + rem = m_state->line_contents.rem; + csubstr trimmed = rem.triml(" \t"); + if(trimmed.len && (trimmed.begins_with(": ") || trimmed.begins_with_any(":,}") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= rem.str); + size_t num = static_cast(trimmed.str - rem.str); + _c4dbgpf("trimming {} whitespace after the scalar: '{}' --> '{}'", num, rem, rem.sub(num)); + rem = rem.sub(num); + _line_progressed(num); + } + } + + if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(2); + if(!has_all(SSCL)) + { + _c4dbgp("no key was found, defaulting to empty key ''"); + _store_scalar_null(rem.str); + } + return true; + } + else if(rem == ':') + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + if(!has_all(SSCL)) + { + _c4dbgp("no key was found, defaulting to empty key ''"); + _store_scalar_null(rem.str); + } + return true; + } + else if(rem.begins_with('?')) + { + _c4dbgp("complex key"); + add_flags(QMRK); + _line_progressed(1); + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("prev scalar was a key with null value"); + _append_key_val_null(rem.str - 1); + _line_progressed(1); + return true; + } + else if(rem.begins_with('}')) + { + _c4dbgp("map terminates after a key..."); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); + _c4dbgp("the last val was null"); + _append_key_val_null(rem.str - 1); + rem_flags(RVAL); + if(has_all(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + _pop_level(); + _line_progressed(1); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_key_anchors_and_refs()) + { + return true; + } + else if(rem == "") + { + return true; + } + else + { + size_t pos = rem.first_not_of(" \t"); + if(pos == csubstr::npos) + pos = 0; + rem = rem.sub(pos); + if(rem.begins_with(':')) + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(pos + 1); + if(!has_all(SSCL)) + { + _c4dbgp("no key was found, defaulting to empty key ''"); + _store_scalar_null(rem.str); + } + return true; + } + else if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + _line_progressed(pos); + rem = _scan_comment(); // also progresses the line + return true; + } + else + { + _c4err("parse error"); + } + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); + bool is_quoted; + if(_scan_scalar_map_flow(&rem, &is_quoted)) + { + _c4dbgp("it's a scalar"); + addrem_flags(RNXT, RVAL|RKEY); + _append_key_val(rem, is_quoted); + if(has_all(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq"); + addrem_flags(RNXT, RVAL|RKEY); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map"); + addrem_flags(RNXT, RVAL|RKEY); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_map(); + addrem_flags(FLOW|RKEY, RNXT|RVAL); + _line_progressed(1); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("appending empty val"); + _append_key_val_null(rem.str - 1); + addrem_flags(RKEY, RVAL); + _line_progressed(1); + if(has_any(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + return true; + } + else if(has_any(RSEQIMAP) && rem.begins_with(']')) + { + _c4dbgp("stopping implicitly nested 1x map"); + if(has_any(SSCL)) + { + _append_key_val_null(rem.str - 1); + } + _stop_seqimap(); + _pop_level(); + return true; + } + else + { + _c4err("parse error"); + } + } + else + { + _c4err("internal error"); + } + + return false; +} + +//----------------------------------------------------------------------------- +bool Parser::_handle_map_blck() +{ + _c4dbgpf("handle_map_blck: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + + if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); + return true; + } + + if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + // actually, we don't need RNXT in indent-based maps. + addrem_flags(RKEY, RNXT); + } + + if(_handle_indentation()) + { + _c4dbgp("indentation token"); + return true; + } + + if(has_any(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + + _c4dbgp("RMAP|RKEY read scalar?"); + bool is_quoted; + if(_scan_scalar_map_blck(&rem, &is_quoted)) // this also progresses the line + { + _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); + if(has_all(QMRK|SSCL)) + { + _c4dbgpf("current key is QMRK; SSCL is set. so take store scalar='{}' as key and add an empty val", m_state->scalar); + _append_key_val_null(rem.str - 1); + } + _store_scalar(rem, is_quoted); + if(has_all(QMRK|RSET)) + { + _c4dbgp("it's a complex key, so use null value '~'"); + _append_key_val_null(rem.str); + } + rem = m_state->line_contents.rem; + + if(rem.begins_with(':')) + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + rem = m_state->line_contents.rem; + if(rem.begins_with_any(" \t")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); + rem = rem.left_of(rem.first_not_of(" \t")); + _c4dbgpf("skip {} spaces/tabs", rem.len); + _line_progressed(rem.len); + } + } + return true; + } + else if(rem.begins_with_any(" \t")) + { + size_t pos = rem.first_not_of(" \t"); + if(pos == npos) + pos = rem.len; + _c4dbgpf("skip {} spaces/tabs", pos); + _line_progressed(pos); + return true; + } + else if(rem == '?' || rem.begins_with("? ")) + { + _c4dbgp("it's a complex key"); + _line_progressed(rem.begins_with("? ") ? 2u : 1u); + if(has_any(SSCL)) + _append_key_val_null(rem.str - 1); + add_flags(QMRK); + return true; + } + else if(has_all(QMRK) && rem.begins_with(':')) + { + _c4dbgp("complex key finished"); + if(!has_any(SSCL)) + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + rem = m_state->line_contents.rem; + if(rem.begins_with(' ')) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); + _skipchars(' '); + } + return true; + } + else if(rem == ':' || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgp("key finished"); + if(!has_all(SSCL)) + { + _c4dbgp("key was empty..."); + _store_scalar_null(rem.str); + rem_flags(QMRK); + } + addrem_flags(RVAL, RKEY); + _line_progressed(rem == ':' ? 1 : 2); + return true; + } + else if(rem.begins_with("...")) + { + _c4dbgp("end current document"); + _end_stream(); + _line_progressed(3); + return true; + } + else if(rem.begins_with("---")) + { + _c4dbgp("start new document '---'"); + _start_new_doc(rem); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_key_anchors_and_refs()) + { + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + + _c4dbgp("RMAP|RVAL read scalar?"); + csubstr s; + bool is_quoted; + if(_scan_scalar_map_blck(&s, &is_quoted)) // this also progresses the line + { + _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); + + rem = m_state->line_contents.rem; + + if(rem.begins_with(": ")) + { + _c4dbgp("actually, the scalar is the first key of a map"); + addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT + _push_level(); + _move_scalar_from_top(); + _move_val_anchor_to_key_anchor(); + _start_map(); + _save_indentation(m_state->scalar_col); + addrem_flags(RVAL, RKEY); + _line_progressed(2); + } + else if(rem.begins_with(':')) + { + _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); + addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT + _push_level(); + _move_scalar_from_top(); + _move_val_anchor_to_key_anchor(); + _start_map(); + _save_indentation(/*behind*/s.len); + addrem_flags(RVAL, RKEY); + _line_progressed(1); + } + else + { + _c4dbgp("appending keyval to current map"); + _append_key_val(s, is_quoted); + addrem_flags(RKEY, RVAL); + } + return true; + } + else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + _c4dbgp("val is a nested seq, indented"); + addrem_flags(RKEY, RVAL); // before _push_level! + _push_level(); + _move_scalar_from_top(); + _start_seq(); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem == '-') + { + _c4dbgp("maybe a seq. start unknown, indented"); + _start_unk(); + _save_indentation(); + _line_progressed(1); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq, flow"); + addrem_flags(RKEY, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map, flow"); + addrem_flags(RKEY, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_map(); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem.begins_with(' ')) + { + csubstr spc = rem.left_of(rem.first_not_of(' ')); + if(_at_line_begin()) + { + _c4dbgpf("skipping value indentation: {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + else + { + _c4dbgpf("skipping {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with("--- ") || rem == "---" || rem.begins_with("---\t")) + { + _start_new_doc(rem); + return true; + } + else if(rem.begins_with("...")) + { + _c4dbgp("end current document"); + _end_stream(); + _line_progressed(3); + return true; + } + else + { + _c4err("parse error"); + } + } + else + { + _c4err("internal error"); + } + + return false; +} + + +//----------------------------------------------------------------------------- +bool Parser::_handle_top() +{ + _c4dbgp("handle_top"); + csubstr rem = m_state->line_contents.rem; + + if(rem.begins_with('#')) + { + _c4dbgp("a comment line"); + _scan_comment(); + return true; + } + + csubstr trimmed = rem.triml(' '); + + if(trimmed.begins_with('%')) + { + _handle_directive(trimmed); + _line_progressed(rem.len); + return true; + } + else if(trimmed.begins_with("--- ") || trimmed == "---" || trimmed.begins_with("---\t")) + { + _start_new_doc(rem); + if(trimmed.len < rem.len) + { + _line_progressed(rem.len - trimmed.len); + _save_indentation(); + } + return true; + } + else if(trimmed.begins_with("...")) + { + _c4dbgp("end current document"); + _end_stream(); + if(trimmed.len < rem.len) + { + _line_progressed(rem.len - trimmed.len); + } + _line_progressed(3); + return true; + } + else + { + _c4err("parse error"); + } + + return false; +} + + +//----------------------------------------------------------------------------- + +bool Parser::_handle_key_anchors_and_refs() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RVAL)); + const csubstr rem = m_state->line_contents.rem; + if(rem.begins_with('&')) + { + _c4dbgp("found a key anchor!!!"); + if(has_all(QMRK|SSCL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); + _c4dbgp("there is a stored key, so this anchor is for the next element"); + _append_key_val_null(rem.str - 1); + rem_flags(QMRK); + return true; + } + csubstr anchor = rem.left_of(rem.first_of(' ')); + _line_progressed(anchor.len); + anchor = anchor.sub(1); // skip the first character + _move_key_anchor_to_val_anchor(); + _c4dbgpf("key anchor value: '{}'", anchor); + m_key_anchor = anchor; + m_key_anchor_indentation = m_state->line_contents.current_col(rem); + return true; + } + else if(C4_UNLIKELY(rem.begins_with('*'))) + { + _c4err("not implemented - this should have been catched elsewhere"); + C4_NEVER_REACH(); + return false; + } + return false; +} + +bool Parser::_handle_val_anchors_and_refs() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RKEY)); + const csubstr rem = m_state->line_contents.rem; + if(rem.begins_with('&')) + { + csubstr anchor = rem.left_of(rem.first_of(' ')); + _line_progressed(anchor.len); + anchor = anchor.sub(1); // skip the first character + _c4dbgpf("val: found an anchor: '{}', indentation={}!!!", anchor, m_state->line_contents.current_col(rem)); + if(m_val_anchor.empty()) + { + _c4dbgpf("save val anchor: '{}'", anchor); + m_val_anchor = anchor; + m_val_anchor_indentation = m_state->line_contents.current_col(rem); + } + else + { + _c4dbgpf("there is a pending val anchor '{}'", m_val_anchor); + if(m_tree->is_seq(m_state->node_id)) + { + if(m_tree->has_children(m_state->node_id)) + { + _c4dbgpf("current node={} is a seq, has {} children", m_state->node_id, m_tree->num_children(m_state->node_id)); + _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); + m_key_anchor = anchor; + m_key_anchor_indentation = m_state->line_contents.current_col(rem); + } + else + { + _c4dbgpf("current node={} is a seq, has no children", m_state->node_id); + if(m_tree->has_val_anchor(m_state->node_id)) + { + _c4dbgpf("... node={} already has val anchor: '{}'", m_state->node_id, m_tree->val_anchor(m_state->node_id)); + _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); + m_key_anchor = anchor; + m_key_anchor_indentation = m_state->line_contents.current_col(rem); + } + else + { + _c4dbgpf("... so set pending val anchor: '{}' on current node {}", m_val_anchor, m_state->node_id); + m_tree->set_val_anchor(m_state->node_id, m_val_anchor); + m_val_anchor = anchor; + m_val_anchor_indentation = m_state->line_contents.current_col(rem); + } + } + } + } + return true; + } + else if(C4_UNLIKELY(rem.begins_with('*'))) + { + _c4err("not implemented - this should have been catched elsewhere"); + C4_NEVER_REACH(); + return false; + } + return false; +} + +void Parser::_move_key_anchor_to_val_anchor() +{ + if(m_key_anchor.empty()) + return; + _c4dbgpf("move current key anchor to val slot: key='{}' -> val='{}'", m_key_anchor, m_val_anchor); + if(!m_val_anchor.empty()) + _c4err("triple-pending anchor"); + m_val_anchor = m_key_anchor; + m_val_anchor_indentation = m_key_anchor_indentation; + m_key_anchor = {}; + m_key_anchor_indentation = {}; +} + +void Parser::_move_val_anchor_to_key_anchor() +{ + if(m_val_anchor.empty()) + return; + if(!_token_is_from_this_line(m_val_anchor)) + return; + _c4dbgpf("move current val anchor to key slot: key='{}' <- val='{}'", m_key_anchor, m_val_anchor); + if(!m_key_anchor.empty()) + _c4err("triple-pending anchor"); + m_key_anchor = m_val_anchor; + m_key_anchor_indentation = m_val_anchor_indentation; + m_val_anchor = {}; + m_val_anchor_indentation = {}; +} + +void Parser::_move_key_tag_to_val_tag() +{ + if(m_key_tag.empty()) + return; + _c4dbgpf("move key tag to val tag: key='{}' -> val='{}'", m_key_tag, m_val_tag); + m_val_tag = m_key_tag; + m_val_tag_indentation = m_key_tag_indentation; + m_key_tag.clear(); + m_key_tag_indentation = 0; +} + +void Parser::_move_val_tag_to_key_tag() +{ + if(m_val_tag.empty()) + return; + if(!_token_is_from_this_line(m_val_tag)) + return; + _c4dbgpf("move val tag to key tag: key='{}' <- val='{}'", m_key_tag, m_val_tag); + m_key_tag = m_val_tag; + m_key_tag_indentation = m_val_tag_indentation; + m_val_tag.clear(); + m_val_tag_indentation = 0; +} + +void Parser::_move_key_tag2_to_key_tag() +{ + if(m_key_tag2.empty()) + return; + _c4dbgpf("move key tag2 to key tag: key='{}' <- key2='{}'", m_key_tag, m_key_tag2); + m_key_tag = m_key_tag2; + m_key_tag_indentation = m_key_tag2_indentation; + m_key_tag2.clear(); + m_key_tag2_indentation = 0; +} + + +//----------------------------------------------------------------------------- + +bool Parser::_handle_types() +{ + csubstr rem = m_state->line_contents.rem.triml(' '); + csubstr t; + + if(rem.begins_with("!!")) + { + _c4dbgp("begins with '!!'"); + t = rem.left_of(rem.first_of(" ,")); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); + //t = t.sub(2); + if(t == "!!set") + add_flags(RSET); + } + else if(rem.begins_with("!<")) + { + _c4dbgp("begins with '!<'"); + t = rem.left_of(rem.first_of('>'), true); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); + //t = t.sub(2, t.len-1); + } + else if(rem.begins_with("!h!")) + { + _c4dbgp("begins with '!h!'"); + t = rem.left_of(rem.first_of(' ')); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 3); + //t = t.sub(3); + } + else if(rem.begins_with('!')) + { + _c4dbgp("begins with '!'"); + t = rem.left_of(rem.first_of(' ')); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); + //t = t.sub(1); + } + + if(t.empty()) + return false; + + if(has_all(QMRK|SSCL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); + _c4dbgp("there is a stored key, so this tag is for the next element"); + _append_key_val_null(rem.str - 1); + rem_flags(QMRK); + } + + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + const char *tag_beginning = rem.str; + #endif + size_t tag_indentation = m_state->line_contents.current_col(t); + _c4dbgpf("there was a tag: '{}', indentation={}", t, tag_indentation); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.end() > m_state->line_contents.rem.begin()); + _line_progressed(static_cast(t.end() - m_state->line_contents.rem.begin())); + { + size_t pos = m_state->line_contents.rem.first_not_of(" \t"); + if(pos != csubstr::npos) + _line_progressed(pos); + } + + if(has_all(RMAP|RKEY)) + { + _c4dbgpf("saving map key tag '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_key_tag.empty()); + m_key_tag = t; + m_key_tag_indentation = tag_indentation; + } + else if(has_all(RMAP|RVAL)) + { + /* foo: !!str + * !!str : bar */ + rem = m_state->line_contents.rem; + rem = rem.left_of(rem.find("#")); + rem = rem.trimr(" \t"); + _c4dbgpf("rem='{}'", rem); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(rem == ':' || rem.begins_with(": ")) + { + _c4dbgp("the last val was null, and this is a tag from a null key"); + _append_key_val_null(tag_beginning - 1); + _store_scalar_null(rem.str - 1); + // do not change the flag to key, it is ~ + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begin() > m_state->line_contents.rem.begin()); + size_t token_len = rem == ':' ? 1 : 2; + _line_progressed(static_cast(token_len + rem.begin() - m_state->line_contents.rem.begin())); + } + #endif + _c4dbgpf("saving map val tag '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); + m_val_tag = t; + m_val_tag_indentation = tag_indentation; + } + else if(has_all(RSEQ|RVAL) || has_all(RTOP|RUNK|NDOC)) + { + if(m_val_tag.empty()) + { + _c4dbgpf("saving seq/doc val tag '{}'", t); + m_val_tag = t; + m_val_tag_indentation = tag_indentation; + } + else + { + _c4dbgpf("saving seq/doc key tag '{}'", t); + m_key_tag = t; + m_key_tag_indentation = tag_indentation; + } + } + else if(has_all(RTOP|RUNK) || has_any(RUNK)) + { + rem = m_state->line_contents.rem; + rem = rem.left_of(rem.find("#")); + rem = rem.trimr(" \t"); + if(rem.empty()) + { + _c4dbgpf("saving val tag '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); + m_val_tag = t; + m_val_tag_indentation = tag_indentation; + } + else + { + _c4dbgpf("saving key tag '{}'", t); + if(m_key_tag.empty()) + { + m_key_tag = t; + m_key_tag_indentation = tag_indentation; + } + else + { + /* handle this case: + * !!str foo: !!map + * !!int 1: !!float 20.0 + * !!int 3: !!float 40.0 + * + * (m_key_tag would be !!str and m_key_tag2 would be !!int) + */ + m_key_tag2 = t; + m_key_tag2_indentation = tag_indentation; + } + } + } + else + { + _c4err("internal error"); + } + + if(m_val_tag.not_empty()) + { + YamlTag_e tag = to_tag(t); + if(tag == TAG_STR) + { + _c4dbgpf("tag '{}' is a str-type tag", t); + if(has_all(RTOP|RUNK|NDOC)) + { + _c4dbgpf("docval. slurping the string. pos={}", m_state->pos.offset); + csubstr scalar = _slurp_doc_scalar(); + _c4dbgpf("docval. after slurp: {}, at node {}: '{}'", m_state->pos.offset, m_state->node_id, scalar); + m_tree->to_val(m_state->node_id, scalar, DOC); + _c4dbgpf("docval. val tag {} -> {}", m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); + m_val_tag.clear(); + if(!m_val_anchor.empty()) + { + _c4dbgpf("setting val anchor[{}]='{}'", m_state->node_id, m_val_anchor); + m_tree->set_val_anchor(m_state->node_id, m_val_anchor); + m_val_anchor.clear(); + } + _end_stream(); + } + } + } + return true; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_slurp_doc_scalar() +{ + csubstr s = m_state->line_contents.rem; + size_t pos = m_state->pos.offset; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.find("---") != csubstr::npos); + _c4dbgpf("slurp 0 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + if(s.len == 0) + { + _line_ended(); + _scan_line(); + s = m_state->line_contents.rem; + pos = m_state->pos.offset; + } + + size_t skipws = s.first_not_of(" \t"); + _c4dbgpf("slurp 1 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + if(skipws != npos) + { + _line_progressed(skipws); + s = m_state->line_contents.rem; + pos = m_state->pos.offset; + _c4dbgpf("slurp 2 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_anchor.empty()); + _handle_val_anchors_and_refs(); + if(!m_val_anchor.empty()) + { + s = m_state->line_contents.rem; + skipws = s.first_not_of(" \t"); + if(skipws != npos) + { + _line_progressed(skipws); + } + s = m_state->line_contents.rem; + pos = m_state->pos.offset; + _c4dbgpf("slurp 3 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + } + + if(s.begins_with('\'')) + { + m_state->scalar_col = m_state->line_contents.current_col(s); + return _scan_squot_scalar(); + } + else if(s.begins_with('"')) + { + m_state->scalar_col = m_state->line_contents.current_col(s); + return _scan_dquot_scalar(); + } + else if(s.begins_with('|') || s.begins_with('>')) + { + return _scan_block(); + } + + _c4dbgpf("slurp 4 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() + pos); + _line_progressed(static_cast(s.end() - (m_buf.begin() + pos))); + + _c4dbgpf("slurp 5 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + + if(_at_line_end()) + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + return s; +} + + +//----------------------------------------------------------------------------- + +bool Parser::_scan_scalar_seq_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = true; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + + _c4dbgp("RSEQ|RVAL"); + if( ! _is_scalar_next__rseq_rval(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + + if(s.ends_with(':')) + { + --s.len; + } + else + { + auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); + if(first) + s.len = first.pos; + } + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_map_blck(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _c4dbgp("_scan_scalar_map_blck"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(FLOW)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); + + csubstr s = m_state->line_contents.rem; + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED + if(s.len == 0) + return false; + #endif + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = true; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + + if( ! _is_scalar_next__rmap(s)) + return false; + + size_t colon_token = s.find(": "); + if(colon_token == npos) + { + _RYML_WITH_OR_WITHOUT_TAB_TOKENS( + // with tab tokens + colon_token = s.find(":\t"); + if(colon_token == npos) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + colon_token = s.find(':'); + if(colon_token != s.len-1) + colon_token = npos; + } + , + // without tab tokens + colon_token = s.find(':'); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + if(colon_token != s.len-1) + colon_token = npos; + ) + } + + if(has_all(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); + if(has_any(QMRK)) + { + _c4dbgp("RMAP|RKEY|CPLX"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + if(s.begins_with("? ") || s == '?') + return false; + s = s.left_of(colon_token); + s = s.left_of(s.first_of("#")); + s = s.trimr(" \t"); + if(s.begins_with("---")) + return false; + else if(s.begins_with("...")) + return false; + } + else + { + _c4dbgp("RMAP|RKEY"); + _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); + if(s.begins_with("? ") || s == '?') + return false; + s = s.left_of(colon_token); + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(s.begins_with("---")) + { + return false; + } + else if(s.begins_with("...")) + { + return false; + } + } + } + else if(has_all(RVAL)) + { + _c4dbgp("RMAP|RVAL"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); + if( ! _is_scalar_next__rmap_val(s)) + return false; + _RYML_WITH_TAB_TOKENS( + else if(s.begins_with("-\t")) + return false; + ) + _c4dbgp("RMAP|RVAL: scalar"); + s = s.left_of(s.find(" #")); // is there a comment? + s = s.left_of(s.find("\t#")); // is there a comment? + s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(s.begins_with("---")) + return false; + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED__OR_REFACTORED + else if(s.begins_with("...")) + return false; + #endif + } + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_seq_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RSEQ)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RVAL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(RKEY)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + + if(has_all(RVAL)) + { + _c4dbgp("RSEQ|RVAL"); + if( ! _is_scalar_next__rseq_rval(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + _c4dbgp("RSEQ|RVAL|FLOW"); + s = s.left_of(s.first_of(",]")); + if(s.ends_with(':')) + { + --s.len; + } + else + { + auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); + if(first) + s.len = first.pos; + } + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + } + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_map_flow(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(FLOW)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY|RVAL)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + + if( ! _is_scalar_next__rmap(s)) + return false; + + if(has_all(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); + size_t colon_token = s.find(": "); + if(colon_token == npos) + { + _RYML_WITH_OR_WITHOUT_TAB_TOKENS( + // with tab tokens + colon_token = s.find(":\t"); + if(colon_token == npos) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + colon_token = s.find(':'); + if(colon_token != s.len-1) + colon_token = npos; + } + , + // without tab tokens + colon_token = s.find(':'); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + if(colon_token != s.len-1) + colon_token = npos; + ) + } + if(s.begins_with("? ") || s == '?') + return false; + if(has_any(QMRK)) + { + _c4dbgp("RMAP|RKEY|CPLX"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + s = s.left_of(colon_token); + s = s.left_of(s.first_of("#")); + s = s.left_of(s.first_of(':')); + s = s.trimr(" \t"); + if(s.begins_with("---")) + return false; + else if(s.begins_with("...")) + return false; + } + else + { + _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); + _c4dbgp("RMAP|RKEY"); + s = s.left_of(colon_token); + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + _c4dbgpf("RMAP|RKEY|FLOW: '{}'", s); + s = s.left_of(s.first_of(",}")); + if(s.ends_with(':')) + --s.len; + } + } + else if(has_all(RVAL)) + { + _c4dbgp("RMAP|RVAL"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); + if( ! _is_scalar_next__rmap_val(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + _c4dbgp("RMAP|RVAL|FLOW"); + if(has_none(RSEQIMAP)) + s = s.left_of(s.first_of(",}")); + else + s = s.left_of(s.first_of(",]")); + s = s.left_of(s.find(" #")); // is there a comment? + s = s.left_of(s.find("\t#")); // is there a comment? + s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + } + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +bool Parser::_scan_scalar_unk(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RUNK)); + + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = true; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + + _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s); + if( ! _is_scalar_next__runk(s)) + { + _c4dbgp("RUNK: no scalar next"); + return false; + } + size_t pos = s.find(" #"); + if(pos != npos) + s = s.left_of(pos); + pos = s.find(": "); + if(pos != npos) + s = s.left_of(pos); + else if(s.ends_with(':')) + s = s.left_of(s.len-1); + _RYML_WITH_TAB_TOKENS( + else if((pos = s.find(":\t")) != npos) // TABS + s = s.left_of(pos); + ) + else + s = s.left_of(s.first_of(',')); + s = s.trim(" \t"); + _c4dbgpf("RUNK: scalar='{}'", s); + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + + +//----------------------------------------------------------------------------- + +csubstr Parser::_extend_scanned_scalar(csubstr s) +{ + if(has_all(RMAP|RKEY|QMRK)) + { + size_t scalar_indentation = has_any(FLOW) ? 0 : m_state->scalar_col; + _c4dbgpf("extend_scalar: explicit key! indref={} scalar_indentation={} scalar_col={}", m_state->indref, scalar_indentation, m_state->scalar_col); + csubstr n = _scan_to_next_nonempty_line(scalar_indentation); + if(!n.empty()) + { + substr full = _scan_complex_key(s, n).trimr(" \t\r\n"); + if(full != s) + s = _filter_plain_scalar(full, scalar_indentation); + } + } + // deal with plain (unquoted) scalars that continue to the next line + else if(!s.begins_with_any("*")) // cannot be a plain scalar if it starts with * (that's an anchor reference) + { + _c4dbgpf("extend_scalar: line ended, scalar='{}'", s); + if(has_none(FLOW)) + { + size_t scalar_indentation = m_state->indref + 1; + if(has_all(RUNK) && scalar_indentation == 1) + scalar_indentation = 0; + csubstr n = _scan_to_next_nonempty_line(scalar_indentation); + if(!n.empty()) + { + _c4dbgpf("rscalar[IMPL]: state_indref={} state_indentation={} scalar_indentation={}", m_state->indref, m_state->line_contents.indentation, scalar_indentation); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.is_super(n)); + substr full = _scan_plain_scalar_blck(s, n, scalar_indentation); + if(full.len >= s.len) + s = _filter_plain_scalar(full, scalar_indentation); + } + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); + csubstr n = _scan_to_next_nonempty_line(/*indentation*/0); + if(!n.empty()) + { + _c4dbgp("rscalar[FLOW]"); + substr full = _scan_plain_scalar_flow(s, n); + s = _filter_plain_scalar(full, /*indentation*/0); + } + } + } + + return s; +} + + +//----------------------------------------------------------------------------- + +substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line) +{ + static constexpr const csubstr chars = "[]{}?#,"; + size_t pos = peeked_line.first_of(chars); + bool first = true; + while(pos != 0) + { + if(has_all(RMAP|RKEY) || has_any(RUNK)) + { + csubstr tpkl = peeked_line.triml(' ').trimr("\r\n"); + if(tpkl.begins_with(": ") || tpkl == ':') + { + _c4dbgpf("rscalar[FLOW]: map value starts on the peeked line: '{}'", peeked_line); + peeked_line = peeked_line.first(0); + break; + } + else + { + auto colon_pos = peeked_line.first_of_any(": ", ":"); + if(colon_pos && colon_pos.pos < pos) + { + peeked_line = peeked_line.first(colon_pos.pos); + _c4dbgpf("rscalar[FLOW]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line); + _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); + _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); + break; + } + } + } + if(pos != npos) + { + _c4dbgpf("rscalar[FLOW]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n")); + peeked_line = peeked_line.left_of(pos); + _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); + _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); + break; + } + _c4dbgpf("rscalar[FLOW]: append another line, full: '{}'", peeked_line.trimr("\r\n")); + if(!first) + { + RYML_CHECK(_advance_to_peeked()); + } + peeked_line = _scan_to_next_nonempty_line(/*indentation*/0); + if(peeked_line.empty()) + { + _c4err("expected token or continuation"); + } + pos = peeked_line.first_of(chars); + first = false; + } + substr full(m_buf.str + (currscalar.str - m_buf.str), m_buf.begin() + m_state->pos.offset); + full = full.trimr("\n\r "); + return full; +} + + +//----------------------------------------------------------------------------- + +substr Parser::_scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); + // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice + // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar + _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); + size_t offs = static_cast(currscalar.end() - m_buf.begin()); + _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.begins_with(' ', indentation)); + while(true) + { + _c4dbgpf("rscalar[IMPL]: continuing... ref_indentation={}", indentation); + if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) + { + _c4dbgpf("rscalar[IMPL]: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); + break; + } + else if(( ! peeked_line.begins_with(' ', indentation))) // is the line deindented? + { + if(!peeked_line.trim(" \r\n\t").empty()) // is the line not blank? + { + _c4dbgpf("rscalar[IMPL]: deindented line, not blank -- bail now '{}'", peeked_line.trimr("\r\n")); + break; + } + _c4dbgpf("rscalar[IMPL]: line is blank and has less indentation: ref={} line={}: '{}'", indentation, peeked_line.first_not_of(' ') == csubstr::npos ? 0 : peeked_line.first_not_of(' '), peeked_line.trimr("\r\n")); + _c4dbgpf("rscalar[IMPL]: ... searching for a line starting at indentation {}", indentation); + csubstr next_peeked = _scan_to_next_nonempty_line(indentation); + if(next_peeked.empty()) + { + _c4dbgp("rscalar[IMPL]: ... finished."); + break; + } + _c4dbgp("rscalar[IMPL]: ... continuing."); + peeked_line = next_peeked; + } + + _c4dbgpf("rscalar[IMPL]: line contents: '{}'", peeked_line.right_of(indentation, true).trimr("\r\n")); + size_t token_pos; + if(peeked_line.find(": ") != npos) + { + _line_progressed(peeked_line.find(": ")); + _c4err("': ' is not a valid token in plain flow (unquoted) scalars"); + } + else if(peeked_line.ends_with(':')) + { + _line_progressed(peeked_line.find(':')); + _c4err("lines cannot end with ':' in plain flow (unquoted) scalars"); + } + else if((token_pos = peeked_line.find(" #")) != npos) + { + _line_progressed(token_pos); + break; + //_c4err("' #' is not a valid token in plain flow (unquoted) scalars"); + } + + _c4dbgpf("rscalar[IMPL]: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); + if(!_advance_to_peeked()) + { + _c4dbgp("rscalar[IMPL]: file finishes after the scalar"); + break; + } + peeked_line = m_state->line_contents.rem; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); + substr full(m_buf.str + (currscalar.str - m_buf.str), + currscalar.len + (m_state->pos.offset - offs)); + full = full.trimr("\r\n "); + return full; +} + +substr Parser::_scan_complex_key(csubstr currscalar, csubstr peeked_line) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); + // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice + // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar + _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); + size_t offs = static_cast(currscalar.end() - m_buf.begin()); + while(true) + { + _c4dbgp("rcplxkey: continuing..."); + if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) + { + _c4dbgpf("rcplxkey: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); + break; + } + else + { + size_t pos = peeked_line.first_of("?:[]{}"); + if(pos == csubstr::npos) + { + pos = peeked_line.find("- "); + } + if(pos != csubstr::npos) + { + _c4dbgpf("rcplxkey: found special characters at pos={}: '{}'", pos, peeked_line.trimr("\r\n")); + _line_progressed(pos); + break; + } + } + + _c4dbgpf("rcplxkey: no special chars found '{}'", peeked_line.trimr("\r\n")); + csubstr next_peeked = _scan_to_next_nonempty_line(0); + if(next_peeked.empty()) + { + _c4dbgp("rcplxkey: empty ... finished."); + break; + } + _c4dbgp("rcplxkey: ... continuing."); + peeked_line = next_peeked; + + _c4dbgpf("rcplxkey: line contents: '{}'", peeked_line.trimr("\r\n")); + size_t colpos; + if((colpos = peeked_line.find(": ")) != npos) + { + _c4dbgp("rcplxkey: found ': ', stopping."); + _line_progressed(colpos); + break; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if((colpos = peeked_line.ends_with(':'))) + { + _c4dbgp("rcplxkey: ends with ':', stopping."); + _line_progressed(colpos); + break; + } + #endif + _c4dbgpf("rcplxkey: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); + if(!_advance_to_peeked()) + { + _c4dbgp("rcplxkey: file finishes after the scalar"); + break; + } + peeked_line = m_state->line_contents.rem; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); + substr full(m_buf.str + (currscalar.str - m_buf.str), + currscalar.len + (m_state->pos.offset - offs)); + return full; +} + +//! scans to the next non-blank line starting with the given indentation +csubstr Parser::_scan_to_next_nonempty_line(size_t indentation) +{ + csubstr next_peeked; + while(true) + { + _c4dbgpf("rscalar: ... curr offset: {} indentation={}", m_state->pos.offset, indentation); + next_peeked = _peek_next_line(m_state->pos.offset); + csubstr next_peeked_triml = next_peeked.triml(' '); + _c4dbgpf("rscalar: ... next peeked line='{}'", next_peeked.trimr("\r\n")); + if(next_peeked_triml.begins_with('#')) + { + _c4dbgp("rscalar: ... first non-space character is #"); + return {}; + } + else if(next_peeked.begins_with(' ', indentation)) + { + _c4dbgpf("rscalar: ... begins at same indentation {}, assuming continuation", indentation); + _advance_to_peeked(); + return next_peeked; + } + else // check for de-indentation + { + csubstr trimmed = next_peeked_triml.trimr("\t\r\n"); + _c4dbgpf("rscalar: ... deindented! trimmed='{}'", trimmed); + if(!trimmed.empty()) + { + _c4dbgp("rscalar: ... and not empty. bailing out."); + return {}; + } + } + if(!_advance_to_peeked()) + { + _c4dbgp("rscalar: file finished"); + return {}; + } + } + return {}; +} + +// returns false when the file finished +bool Parser::_advance_to_peeked() +{ + _line_progressed(m_state->line_contents.rem.len); + _line_ended(); // advances to the peeked-at line, consuming all remaining (probably newline) characters on the current line + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.first_of("\r\n") == csubstr::npos); + _c4dbgpf("advance to peeked: scan more... pos={} len={}", m_state->pos.offset, m_buf.len); + _scan_line(); // puts the peeked-at line in the buffer + if(_finished_file()) + { + _c4dbgp("rscalar: finished file!"); + return false; + } + return true; +} + +//----------------------------------------------------------------------------- + +C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following) +{ + return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n'); +} + +//! look for the next newline chars, and jump to the right of those +csubstr from_next_line(csubstr rem) +{ + size_t nlpos = rem.first_of("\r\n"); + if(nlpos == csubstr::npos) + return {}; + const char nl = rem[nlpos]; + rem = rem.right_of(nlpos); + if(rem.empty()) + return {}; + if(_extend_from_combined_newline(nl, rem.front())) + rem = rem.sub(1); + return rem; +} + +csubstr Parser::_peek_next_line(size_t pos) const +{ + csubstr rem{}; // declare here because of the goto + size_t nlpos{}; // declare here because of the goto + pos = pos == npos ? m_state->pos.offset : pos; + if(pos >= m_buf.len) + goto next_is_empty; + + // look for the next newline chars, and jump to the right of those + rem = from_next_line(m_buf.sub(pos)); + if(rem.empty()) + goto next_is_empty; + + // now get everything up to and including the following newline chars + nlpos = rem.first_of("\r\n"); + if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len)) + nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]); + rem = rem.left_of(nlpos, /*include_pos*/true); + + _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n")); + return rem; + +next_is_empty: + _c4dbgpf("peek next line @ {}: (len=0)''", pos); + return {}; +} + + +//----------------------------------------------------------------------------- +void Parser::LineContents::reset_with_next_line(csubstr buf, size_t offset) +{ + RYML_ASSERT(offset <= buf.len); + char const* C4_RESTRICT b = &buf[offset]; + char const* C4_RESTRICT e = b; + // get the current line stripped of newline chars + while(e < buf.end() && (*e != '\n' && *e != '\r')) + ++e; + RYML_ASSERT(e >= b); + const csubstr stripped_ = buf.sub(offset, static_cast(e - b)); + // advance pos to include the first line ending + if(e != buf.end() && *e == '\r') + ++e; + if(e != buf.end() && *e == '\n') + ++e; + RYML_ASSERT(e >= b); + const csubstr full_ = buf.sub(offset, static_cast(e - b)); + reset(full_, stripped_); +} + +void Parser::_scan_line() +{ + if(m_state->pos.offset >= m_buf.len) + { + m_state->line_contents.reset(m_buf.last(0), m_buf.last(0)); + return; + } + m_state->line_contents.reset_with_next_line(m_buf, m_state->pos.offset); +} + + +//----------------------------------------------------------------------------- +void Parser::_line_progressed(size_t ahead) +{ + _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead); + m_state->pos.offset += ahead; + m_state->pos.col += ahead; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col <= m_state->line_contents.stripped.len+1); + m_state->line_contents.rem = m_state->line_contents.rem.sub(ahead); +} + +void Parser::_line_ended() +{ + _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, m_state->pos.offset, m_state->pos.offset+m_state->line_contents.full.len - m_state->line_contents.stripped.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == m_state->line_contents.stripped.len+1); + m_state->pos.offset += m_state->line_contents.full.len - m_state->line_contents.stripped.len; + ++m_state->pos.line; + m_state->pos.col = 1; +} + +void Parser::_line_ended_undo() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == 1u); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line > 0u); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len); + size_t delta = m_state->line_contents.full.len - m_state->line_contents.stripped.len; + _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - delta); + m_state->pos.offset -= delta; + --m_state->pos.line; + m_state->pos.col = m_state->line_contents.stripped.len + 1u; + // don't forget to undo also the changes to the remainder of the line + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_buf.len || m_buf[m_state->pos.offset] == '\n' || m_buf[m_state->pos.offset] == '\r'); + m_state->line_contents.rem = m_buf.sub(m_state->pos.offset, 0); +} + + +//----------------------------------------------------------------------------- +void Parser::_set_indentation(size_t indentation) +{ + m_state->indref = indentation; + _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); +} + +void Parser::_save_indentation(size_t behind) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begin() >= m_state->line_contents.full.begin()); + m_state->indref = static_cast(m_state->line_contents.rem.begin() - m_state->line_contents.full.begin()); + _RYML_CB_ASSERT(m_stack.m_callbacks, behind <= m_state->indref); + m_state->indref -= behind; + _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); +} + +bool Parser::_maybe_set_indentation_from_anchor_or_tag() +{ + if(m_key_anchor.not_empty()) + { + _c4dbgpf("set indentation from key anchor: {}", m_key_anchor_indentation); + _set_indentation(m_key_anchor_indentation); // this is the column where the anchor starts + return true; + } + else if(m_key_tag.not_empty()) + { + _c4dbgpf("set indentation from key tag: {}", m_key_tag_indentation); + _set_indentation(m_key_tag_indentation); // this is the column where the tag starts + return true; + } + return false; +} + + +//----------------------------------------------------------------------------- +void Parser::_write_key_anchor(size_t node_id) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_key(node_id)); + if( ! m_key_anchor.empty()) + { + _c4dbgpf("node={}: set key anchor to '{}'", node_id, m_key_anchor); + m_tree->set_key_anchor(node_id, m_key_anchor); + m_key_anchor.clear(); + m_key_anchor_was_before = false; + m_key_anchor_indentation = 0; + } + else if( ! m_tree->is_key_quoted(node_id)) + { + csubstr r = m_tree->key(node_id); + if(r.begins_with('*')) + { + _c4dbgpf("node={}: set key reference: '{}'", node_id, r); + m_tree->set_key_ref(node_id, r.sub(1)); + } + else if(r == "<<") + { + m_tree->set_key_ref(node_id, r); + _c4dbgpf("node={}: it's an inheriting reference", node_id); + if(m_tree->is_seq(node_id)) + { + _c4dbgpf("node={}: inheriting from seq of {}", node_id, m_tree->num_children(node_id)); + for(size_t i = m_tree->first_child(node_id); i != NONE; i = m_tree->next_sibling(i)) + { + if( ! (m_tree->val(i).begins_with('*'))) + _c4err("malformed reference: '{}'", m_tree->val(i)); + } + } + else if( ! m_tree->val(node_id).begins_with('*')) + { + _c4err("malformed reference: '{}'", m_tree->val(node_id)); + } + //m_tree->set_key_ref(node_id, r); + } + } +} + +//----------------------------------------------------------------------------- +void Parser::_write_val_anchor(size_t node_id) +{ + if( ! m_val_anchor.empty()) + { + _c4dbgpf("node={}: set val anchor to '{}'", node_id, m_val_anchor); + m_tree->set_val_anchor(node_id, m_val_anchor); + m_val_anchor.clear(); + } + csubstr r = m_tree->has_val(node_id) ? m_tree->val(node_id) : ""; + if(!m_tree->is_val_quoted(node_id) && r.begins_with('*')) + { + _c4dbgpf("node={}: set val reference: '{}'", node_id, r); + RYML_CHECK(!m_tree->has_val_anchor(node_id)); + m_tree->set_val_ref(node_id, r.sub(1)); + } +} + +//----------------------------------------------------------------------------- +void Parser::_push_level(bool explicit_flow_chars) +{ + _c4dbgpf("pushing level! currnode={} currlevel={} stacksize={} stackcap={}", m_state->node_id, m_state->level, m_stack.size(), m_stack.capacity()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); + if(node(m_state) == nullptr) + { + _c4dbgp("pushing level! actually no, current node is null"); + //_RYML_CB_ASSERT(m_stack.m_callbacks, ! explicit_flow_chars); + return; + } + flag_t st = RUNK; + if(explicit_flow_chars || has_all(FLOW)) + { + st |= FLOW; + } + m_stack.push_top(); + m_state = &m_stack.top(); + set_flags(st); + m_state->node_id = (size_t)NONE; + m_state->indref = (size_t)NONE; + ++m_state->level; + _c4dbgpf("pushing level: now, currlevel={}", m_state->level); +} + +void Parser::_pop_level() +{ + _c4dbgpf("popping level! currnode={} currlevel={}", m_state->node_id, m_state->level); + if(has_any(RMAP) || m_tree->is_map(m_state->node_id)) + { + _stop_map(); + } + if(has_any(RSEQ) || m_tree->is_seq(m_state->node_id)) + { + _stop_seq(); + } + if(m_tree->is_doc(m_state->node_id)) + { + _stop_doc(); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1); + _prepare_pop(); + m_stack.pop(); + m_state = &m_stack.top(); + /*if(has_any(RMAP)) + { + _toggle_key_val(); + }*/ + if(m_state->line_contents.indentation == 0) + { + //_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RTOP)); + add_flags(RTOP); + } + _c4dbgpf("popping level: now, currnode={} currlevel={}", m_state->node_id, m_state->level); +} + +//----------------------------------------------------------------------------- +void Parser::_start_unk(bool /*as_child*/) +{ + _c4dbgp("start_unk"); + _push_level(); + _move_scalar_from_top(); +} + +//----------------------------------------------------------------------------- +void Parser::_start_doc(bool as_child) +{ + _c4dbgpf("start_doc (as child={})", as_child); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); + size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_root(parent_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); + if(as_child) + { + _c4dbgpf("start_doc: parent={}", parent_id); + if( ! m_tree->is_stream(parent_id)) + { + _c4dbgp("start_doc: rearranging with root as STREAM"); + m_tree->set_root_as_stream(); + } + m_state->node_id = m_tree->append_child(parent_id); + m_tree->to_doc(m_state->node_id); + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(parent_id) || m_tree->empty(parent_id)); + m_state->node_id = parent_id; + if( ! m_tree->is_doc(parent_id)) + { + m_tree->to_doc(parent_id, DOC); + } + } + #endif + _c4dbgpf("start_doc: id={}", m_state->node_id); + add_flags(RUNK|RTOP|NDOC); + _handle_types(); + rem_flags(NDOC); +} + +void Parser::_stop_doc() +{ + size_t doc_node = m_state->node_id; + _c4dbgpf("stop_doc[{}]", doc_node); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_doc(doc_node)); + if(!m_tree->is_seq(doc_node) && !m_tree->is_map(doc_node) && !m_tree->is_val(doc_node)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); + _c4dbgpf("stop_doc[{}]: there was nothing; adding null val", doc_node); + m_tree->to_val(doc_node, {}, DOC); + } +} + +void Parser::_end_stream() +{ + _c4dbgpf("end_stream, level={} node_id={}", m_state->level, m_state->node_id); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_stack.empty()); + NodeData *added = nullptr; + if(has_any(SSCL)) + { + if(m_tree->is_seq(m_state->node_id)) + { + _c4dbgp("append val..."); + added = _append_val(_consume_scalar()); + } + else if(m_tree->is_map(m_state->node_id)) + { + _c4dbgp("append null key val..."); + added = _append_key_val_null(m_state->line_contents.rem.str); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(has_any(RSEQIMAP)) + { + _stop_seqimap(); + _pop_level(); + } + #endif + } + else if(m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE) + { + NodeType_e quoted = has_any(QSCL) ? VALQUO : NOTYPE; // do this before consuming the scalar + csubstr scalar = _consume_scalar(); + _c4dbgpf("node[{}]: to docval '{}'{}", m_state->node_id, scalar, quoted == VALQUO ? ", quoted" : ""); + m_tree->to_val(m_state->node_id, scalar, DOC|quoted); + added = m_tree->get(m_state->node_id); + } + else + { + _c4err("internal error"); + } + } + else if(has_all(RSEQ|RVAL) && has_none(FLOW)) + { + _c4dbgp("add last..."); + added = _append_val_null(m_state->line_contents.rem.str); + } + else if(!m_val_tag.empty() && (m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE)) + { + csubstr scalar = m_state->line_contents.rem.first(0); + _c4dbgpf("node[{}]: add null scalar as docval", m_state->node_id); + m_tree->to_val(m_state->node_id, scalar, DOC); + added = m_tree->get(m_state->node_id); + } + + if(added) + { + size_t added_id = m_tree->id(added); + if(m_tree->is_seq(m_state->node_id) || m_tree->is_doc(m_state->node_id)) + { + if(!m_key_anchor.empty()) + { + _c4dbgpf("node[{}]: move key to val anchor: '{}'", added_id, m_key_anchor); + m_val_anchor = m_key_anchor; + m_key_anchor = {}; + } + if(!m_key_tag.empty()) + { + _c4dbgpf("node[{}]: move key to val tag: '{}'", added_id, m_key_tag); + m_val_tag = m_key_tag; + m_key_tag = {}; + } + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(!m_key_anchor.empty()) + { + _c4dbgpf("node[{}]: set key anchor='{}'", added_id, m_key_anchor); + m_tree->set_key_anchor(added_id, m_key_anchor); + m_key_anchor = {}; + } + #endif + if(!m_val_anchor.empty()) + { + _c4dbgpf("node[{}]: set val anchor='{}'", added_id, m_val_anchor); + m_tree->set_val_anchor(added_id, m_val_anchor); + m_val_anchor = {}; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(!m_key_tag.empty()) + { + _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", added_id, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(added_id, normalize_tag(m_key_tag)); + m_key_tag = {}; + } + #endif + if(!m_val_tag.empty()) + { + _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", added_id, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(added_id, normalize_tag(m_val_tag)); + m_val_tag = {}; + } + } + + while(m_stack.size() > 1) + { + _c4dbgpf("popping level: {} (stack sz={})", m_state->level, m_stack.size()); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL, &m_stack.top())); + if(has_all(RSEQ|FLOW)) + _err("closing ] not found"); + _pop_level(); + } + add_flags(NDOC); +} + +void Parser::_start_new_doc(csubstr rem) +{ + _c4dbgp("_start_new_doc"); + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begins_with("---")); + C4_UNUSED(rem); + + _end_stream(); + + size_t indref = m_state->indref; + _c4dbgpf("start a document, indentation={}", indref); + _line_progressed(3); + _push_level(); + _start_doc(); + _set_indentation(indref); +} + + +//----------------------------------------------------------------------------- +void Parser::_start_map(bool as_child) +{ + _c4dbgpf("start_map (as child={})", as_child); + addrem_flags(RMAP|RVAL, RKEY|RUNK); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); + size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); + if(as_child) + { + m_state->node_id = m_tree->append_child(parent_id); + if(has_all(SSCL)) + { + type_bits key_quoted = NOTYPE; + if(m_state->flags & QSCL) // before consuming the scalar + key_quoted |= KEYQUO; + csubstr key = _consume_scalar(); + m_tree->to_map(m_state->node_id, key, key_quoted); + _c4dbgpf("start_map: id={} key='{}'", m_state->node_id, m_tree->key(m_state->node_id)); + _write_key_anchor(m_state->node_id); + if( ! m_key_tag.empty()) + { + _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); + m_key_tag.clear(); + } + } + else + { + m_tree->to_map(m_state->node_id); + _c4dbgpf("start_map: id={}", m_state->node_id); + } + m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; + _write_val_anchor(m_state->node_id); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + m_state->node_id = parent_id; + _c4dbgpf("start_map: id={}", m_state->node_id); + type_bits as_doc = 0; + if(m_tree->is_doc(m_state->node_id)) + as_doc |= DOC; + if(!m_tree->is_map(parent_id)) + { + RYML_CHECK(!m_tree->has_children(parent_id)); + m_tree->to_map(parent_id, as_doc); + } + else + { + m_tree->_add_flags(parent_id, as_doc); + } + _move_scalar_from_top(); + if(m_key_anchor.not_empty()) + m_key_anchor_was_before = true; + _write_val_anchor(parent_id); + if(m_stack.size() >= 2) + { + State const& parent_state = m_stack.top(1); + if(parent_state.flags & RSET) + add_flags(RSET); + } + m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; + } + if( ! m_val_tag.empty()) + { + _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } +} + +void Parser::_start_map_unk(bool as_child) +{ + if(!m_key_anchor_was_before) + { + _c4dbgpf("stash key anchor before starting map... '{}'", m_key_anchor); + csubstr ka = m_key_anchor; + m_key_anchor = {}; + _start_map(as_child); + m_key_anchor = ka; + } + else + { + _start_map(as_child); + m_key_anchor_was_before = false; + } + if(m_key_tag2.not_empty()) + { + m_key_tag = m_key_tag2; + m_key_tag_indentation = m_key_tag2_indentation; + m_key_tag2.clear(); + m_key_tag2_indentation = 0; + } +} + +void Parser::_stop_map() +{ + _c4dbgpf("stop_map[{}]", m_state->node_id); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); + if(has_all(QMRK|RKEY) && !has_all(SSCL)) + { + _c4dbgpf("stop_map[{}]: RKEY", m_state->node_id); + _store_scalar_null(m_state->line_contents.rem.str); + _append_key_val_null(m_state->line_contents.rem.str); + } +} + + +//----------------------------------------------------------------------------- +void Parser::_start_seq(bool as_child) +{ + _c4dbgpf("start_seq (as child={})", as_child); + if(has_all(RTOP|RUNK)) + { + _c4dbgpf("start_seq: moving key tag to val tag: '{}'", m_key_tag); + m_val_tag = m_key_tag; + m_key_tag.clear(); + } + addrem_flags(RSEQ|RVAL, RUNK); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); + size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); + if(as_child) + { + m_state->node_id = m_tree->append_child(parent_id); + if(has_all(SSCL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(parent_id)); + type_bits key_quoted = 0; + if(m_state->flags & QSCL) // before consuming the scalar + key_quoted |= KEYQUO; + csubstr key = _consume_scalar(); + m_tree->to_seq(m_state->node_id, key, key_quoted); + _c4dbgpf("start_seq: id={} name='{}'", m_state->node_id, m_tree->key(m_state->node_id)); + _write_key_anchor(m_state->node_id); + if( ! m_key_tag.empty()) + { + _c4dbgpf("start_seq[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); + m_key_tag.clear(); + } + } + else + { + type_bits as_doc = 0; + _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_doc(m_state->node_id)); + m_tree->to_seq(m_state->node_id, as_doc); + _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as doc" : ""); + } + _write_val_anchor(m_state->node_id); + m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; + } + else + { + m_state->node_id = parent_id; + type_bits as_doc = 0; + if(m_tree->is_doc(m_state->node_id)) + as_doc |= DOC; + if(!m_tree->is_seq(parent_id)) + { + RYML_CHECK(!m_tree->has_children(parent_id)); + m_tree->to_seq(parent_id, as_doc); + } + else + { + m_tree->_add_flags(parent_id, as_doc); + } + _move_scalar_from_top(); + _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as_doc" : ""); + _write_val_anchor(parent_id); + m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; + } + if( ! m_val_tag.empty()) + { + _c4dbgpf("start_seq[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } +} + +void Parser::_stop_seq() +{ + _c4dbgp("stop_seq"); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); +} + + +//----------------------------------------------------------------------------- +void Parser::_start_seqimap() +{ + _c4dbgpf("start_seqimap at node={}. has_children={}", m_state->node_id, m_tree->has_children(m_state->node_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); + // create a map, and turn the last scalar of this sequence + // into the key of the map's first child. This scalar was + // understood to be a value in the sequence, but it is + // actually a key of a map, implicitly opened here. + // Eg [val, key: val] + // + // Yep, YAML is crazy. + if(m_tree->has_children(m_state->node_id) && m_tree->has_val(m_tree->last_child(m_state->node_id))) + { + size_t prev = m_tree->last_child(m_state->node_id); + NodeType ty = m_tree->_p(prev)->m_type; // don't use type() because it masks out the quotes + NodeScalar tmp = m_tree->valsc(prev); + _c4dbgpf("has children and last child={} has val. saving the scalars, val='{}' quoted={}", prev, tmp.scalar, ty.is_val_quoted()); + m_tree->remove(prev); + _push_level(); + _start_map(); + _store_scalar(tmp.scalar, ty.is_val_quoted()); + m_key_anchor = tmp.anchor; + m_key_tag = tmp.tag; + } + else + { + _c4dbgpf("node {} has no children yet, using empty key", m_state->node_id); + _push_level(); + _start_map(); + _store_scalar_null(m_state->line_contents.rem.str); + } + add_flags(RSEQIMAP|FLOW); +} + +void Parser::_stop_seqimap() +{ + _c4dbgp("stop_seqimap"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQIMAP)); +} + + +//----------------------------------------------------------------------------- +NodeData* Parser::_append_val(csubstr val, flag_t quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_all(SSCL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); + type_bits additional_flags = quoted ? VALQUO : NOTYPE; + _c4dbgpf("append val: '{}' to parent id={} (level={}){}", val, m_state->node_id, m_state->level, quoted ? " VALQUO!" : ""); + size_t nid = m_tree->append_child(m_state->node_id); + m_tree->to_val(nid, val, additional_flags); + + _c4dbgpf("append val: id={} val='{}'", nid, m_tree->get(nid)->m_val.scalar); + if( ! m_val_tag.empty()) + { + _c4dbgpf("append val[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } + _write_val_anchor(nid); + return m_tree->get(nid); +} + +NodeData* Parser::_append_key_val(csubstr val, flag_t val_quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); + type_bits additional_flags = 0; + if(m_state->flags & QSCL) + additional_flags |= KEYQUO; + if(val_quoted) + additional_flags |= VALQUO; + + csubstr key = _consume_scalar(); + _c4dbgpf("append keyval: '{}' '{}' to parent id={} (level={}){}{}", key, val, m_state->node_id, m_state->level, (additional_flags & KEYQUO) ? " KEYQUO!" : "", (additional_flags & VALQUO) ? " VALQUO!" : ""); + size_t nid = m_tree->append_child(m_state->node_id); + m_tree->to_keyval(nid, key, val, additional_flags); + _c4dbgpf("append keyval: id={} key='{}' val='{}'", nid, m_tree->key(nid), m_tree->val(nid)); + if( ! m_key_tag.empty()) + { + _c4dbgpf("append keyval[{}]: set key tag='{}' -> '{}'", nid, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(nid, normalize_tag(m_key_tag)); + m_key_tag.clear(); + } + if( ! m_val_tag.empty()) + { + _c4dbgpf("append keyval[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } + _write_key_anchor(nid); + _write_val_anchor(nid); + rem_flags(QMRK); + return m_tree->get(nid); +} + + +//----------------------------------------------------------------------------- +void Parser::_store_scalar(csubstr s, flag_t is_quoted) +{ + _c4dbgpf("state[{}]: storing scalar '{}' (flag: {}) (old scalar='{}')", + m_state-m_stack.begin(), s, m_state->flags & SSCL, m_state->scalar); + RYML_CHECK(has_none(SSCL)); + add_flags(SSCL | (is_quoted * QSCL)); + m_state->scalar = s; +} + +csubstr Parser::_consume_scalar() +{ + _c4dbgpf("state[{}]: consuming scalar '{}' (flag: {}))", m_state-m_stack.begin(), m_state->scalar, m_state->flags & SSCL); + RYML_CHECK(m_state->flags & SSCL); + csubstr s = m_state->scalar; + rem_flags(SSCL | QSCL); + m_state->scalar.clear(); + return s; +} + +void Parser::_move_scalar_from_top() +{ + if(m_stack.size() < 2) return; + State &prev = m_stack.top(1); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state != &prev); + if(prev.flags & SSCL) + { + _c4dbgpf("moving scalar '{}' from state[{}] to state[{}] (overwriting '{}')", prev.scalar, &prev-m_stack.begin(), m_state-m_stack.begin(), m_state->scalar); + add_flags(prev.flags & (SSCL | QSCL)); + m_state->scalar = prev.scalar; + rem_flags(SSCL | QSCL, &prev); + prev.scalar.clear(); + } +} + +//----------------------------------------------------------------------------- +/** @todo this function is a monster and needs love. Likely, it needs + * to be split like _scan_scalar_*() */ +bool Parser::_handle_indentation() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + if( ! _at_line_begin()) + return false; + + size_t ind = m_state->line_contents.indentation; + csubstr rem = m_state->line_contents.rem; + /** @todo instead of trimming, we should use the indentation index from above */ + csubstr remt = rem.triml(' '); + + if(remt.empty() || remt.begins_with('#')) // this is a blank or comment line + { + _line_progressed(rem.size()); + return true; + } + + _c4dbgpf("indentation? ind={} indref={}", ind, m_state->indref); + if(ind == m_state->indref) + { + _c4dbgpf("same indentation: {}", ind); + if(!rem.sub(ind).begins_with('-')) + { + _c4dbgp("does not begin with -"); + if(has_any(RMAP)) + { + if(has_all(SSCL|RVAL)) + { + _c4dbgp("add with null val"); + _append_key_val_null(rem.str + ind - 1); + addrem_flags(RKEY, RVAL); + } + } + else if(has_any(RSEQ)) + { + if(m_stack.size() > 2) // do not pop to root level + { + if(has_any(RNXT)) + { + _c4dbgp("end the indentless seq"); + _pop_level(); + return true; + } + else if(has_any(RVAL)) + { + _c4dbgp("add with null val"); + _append_val_null(rem.str); + _c4dbgp("end the indentless seq"); + _pop_level(); + return true; + } + } + } + } + _line_progressed(ind); + return ind > 0; + } + else if(ind < m_state->indref) + { + _c4dbgpf("smaller indentation ({} < {})!!!", ind, m_state->indref); + if(has_all(RVAL)) + { + _c4dbgp("there was an empty val -- appending"); + if(has_all(RMAP)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); + _append_key_val_null(rem.sub(ind).str - 1); + } + else if(has_all(RSEQ)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); + _append_val_null(rem.sub(ind).str - 1); + } + } + // search the stack frame to jump to based on its indentation + State const* popto = nullptr; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.is_contiguous()); // this search relies on the stack being contiguous + for(State const* s = m_state-1; s >= m_stack.begin(); --s) + { + _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id); + if(s->indref == ind) + { + _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id); + popto = s; + // while it may be tempting to think we're done at this + // point, we must still determine whether we're jumping to a + // parent with the same indentation. Consider this case with + // an indentless sequence: + // + // product: + // - sku: BL394D + // quantity: 4 + // description: Basketball + // price: 450.00 + // - sku: BL4438H + // quantity: 1 + // description: Super Hoop + // price: 2392.00 # jumping one level here would be wrong. + // tax: 1234.5 # we must jump two levels + if(popto > m_stack.begin()) + { + auto parent = popto - 1; + if(parent->indref == popto->indref) + { + _c4dbgpf("the parent (level={},node={}) has the same indentation ({}). is this in an indentless sequence?", parent->level, parent->node_id, popto->indref); + _c4dbgpf("isseq(popto)={} ismap(parent)={}", m_tree->is_seq(popto->node_id), m_tree->is_map(parent->node_id)); + if(m_tree->is_seq(popto->node_id) && m_tree->is_map(parent->node_id)) + { + if( ! remt.begins_with('-')) + { + _c4dbgp("this is an indentless sequence"); + popto = parent; + } + else + { + _c4dbgp("not an indentless sequence"); + } + } + } + } + break; + } + } + if(!popto || popto >= m_state || popto->level >= m_state->level) + { + _c4err("parse error: incorrect indentation?"); + } + _c4dbgpf("popping {} levels: from level {} to level {}", m_state->level-popto->level, m_state->level, popto->level); + while(m_state != popto) + { + _c4dbgpf("popping level {} (indentation={})", m_state->level, m_state->indref); + _pop_level(); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, ind == m_state->indref); + _line_progressed(ind); + return true; + } + else + { + _c4dbgpf("larger indentation ({} > {})!!!", ind, m_state->indref); + _RYML_CB_ASSERT(m_stack.m_callbacks, ind > m_state->indref); + if(has_all(RMAP|RVAL)) + { + if(_is_scalar_next__rmap_val(remt) && remt.first_of(":?") == npos) + { + _c4dbgpf("actually it seems a value: '{}'", remt); + } + else + { + addrem_flags(RKEY, RVAL); + _start_unk(); + //_move_scalar_from_top(); + _line_progressed(ind); + _save_indentation(); + return true; + } + } + else if(has_all(RSEQ|RVAL)) + { + // nothing to do here + } + else + { + _c4err("parse error - indentation should not increase at this point"); + } + } + + return false; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_comment() +{ + csubstr s = m_state->line_contents.rem; + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('#')); + _line_progressed(s.len); + // skip the # character + s = s.sub(1); + // skip leading whitespace + s = s.right_of(s.first_not_of(' '), /*include_pos*/true); + _c4dbgpf("comment was '{}'", s); + return s; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_squot_scalar() +{ + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ + + // a span to the end of the file + size_t b = m_state->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_state->pos.offset; // take this into account + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('\'')); + + // skip the opening quote + _line_progressed(1); + s = s.sub(1); + + bool needs_filter = false; + + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) + { + const csubstr line = m_state->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_state->pos.line, line); + for(size_t i = 0; i < line.len; ++i) + { + const char curr = line.str[i]; + if(curr == '\'') // single quotes are escaped with two single quotes + { + const char next = i+1 < line.len ? line.str[i+1] : '~'; + if(next != '\'') // so just look for the first quote + { // without another after it + pos = i; + break; + } + else + { + needs_filter = true; // needs filter to remove escaped quotes + ++i; // skip the escaped quote + } + } + else if(curr != ' ') + { + line_is_blank = false; + } + } + + // leading whitespace also needs filtering + needs_filter = needs_filter + || (numlines > 1) + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')); + + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '\''); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_state->pos.offset - b - 1; // but we stop before it + break; + } + + _line_ended(); + _scan_line(); + } + + if(pos == npos) + { + _c4err("reached end of file while looking for closing quote"); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\''); + s = s.sub(0, pos-1); + } + + if(needs_filter) + { + csubstr ret = _filter_squot_scalar(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); + _c4dbgpf("final scalar: \"{}\"", ret); + return ret; + } + + _c4dbgpf("final scalar: \"{}\"", s); + + return s; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_dquot_scalar() +{ + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ + + // a span to the end of the file + size_t b = m_state->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_state->pos.offset; // take this into account + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('"')); + + // skip the opening quote + _line_progressed(1); + s = s.sub(1); + + bool needs_filter = false; + + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) + { + const csubstr line = m_state->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_state->pos.line, line); + for(size_t i = 0; i < line.len; ++i) + { + const char curr = line.str[i]; + if(curr != ' ') + line_is_blank = false; + // every \ is an escape + if(curr == '\\') + { + const char next = i+1 < line.len ? line.str[i+1] : '~'; + needs_filter = true; + if(next == '"' || next == '\\') + ++i; + } + else if(curr == '"') + { + pos = i; + break; + } + } + + // leading whitespace also needs filtering + needs_filter = needs_filter + || (numlines > 1) + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')); + + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '"'); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_state->pos.offset - b - 1; // but we stop before it + break; + } + + _line_ended(); + _scan_line(); + } + + if(pos == npos) + { + _c4err("reached end of file looking for closing quote"); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"'); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + s = s.sub(0, pos-1); + } + + if(needs_filter) + { + csubstr ret = _filter_dquot_scalar(s); + _c4dbgpf("final scalar: [{}]\"{}\"", ret.len, ret); + _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); + return ret; + } + + _c4dbgpf("final scalar: \"{}\"", s); + + return s; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_block() +{ + // nice explanation here: http://yaml-multiline.info/ + csubstr s = m_state->line_contents.rem; + csubstr trimmed = s.triml(' '); + if(trimmed.str > s.str) + { + _c4dbgp("skipping whitespace"); + _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= s.str); + _line_progressed(static_cast(trimmed.str - s.str)); + s = trimmed; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>')); + + _c4dbgpf("scanning block: specs=\"{}\"", s); + + // parse the spec + BlockStyle_e newline = s.begins_with('>') ? BLOCK_FOLD : BLOCK_LITERAL; + BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used + size_t indentation = npos; // have to find out if no spec is given + csubstr digits; + if(s.len > 1) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with_any("|>")); + csubstr t = s.sub(1); + _c4dbgpf("scanning block: spec is multichar: '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); + size_t pos = t.first_of("-+"); + _c4dbgpf("scanning block: spec chomp char at {}", pos); + if(pos != npos) + { + if(t[pos] == '-') + chomp = CHOMP_STRIP; + else if(t[pos] == '+') + chomp = CHOMP_KEEP; + if(pos == 0) + t = t.sub(1); + else + t = t.first(pos); + } + // from here to the end, only digits are considered + digits = t.left_of(t.first_not_of("0123456789")); + if( ! digits.empty()) + { + if( ! c4::atou(digits, &indentation)) + _c4err("parse error: could not read decimal"); + _c4dbgpf("scanning block: indentation specified: {}. add {} from curr state -> {}", indentation, m_state->indref, indentation+m_state->indref); + indentation += m_state->indref; + } + } + + // finish the current line + _line_progressed(s.len); + _line_ended(); + _scan_line(); + + _c4dbgpf("scanning block: style={} chomp={} indentation={}", newline==BLOCK_FOLD ? "fold" : "literal", chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation); + + // start with a zero-length block, already pointing at the right place + substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0); + _RYML_CB_ASSERT(m_stack.m_callbacks, raw_block.begin() == m_state->line_contents.full.begin()); + + // read every full line into a raw block, + // from which newlines are to be stripped as needed. + // + // If no explicit indentation was given, pick it from the first + // non-empty line. See + // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator + size_t num_lines = 0, first = m_state->pos.line, provisional_indentation = npos; + LineContents lc; + while(( ! _finished_file())) + { + // peek next line, but do not advance immediately + lc.reset_with_next_line(m_buf, m_state->pos.offset); + _c4dbgpf("scanning block: peeking at '{}'", lc.stripped); + // evaluate termination conditions + if(indentation != npos) + { + // stop when the line is deindented and not empty + if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty())) + { + _c4dbgpf("scanning block: indentation decreased ref={} thisline={}", indentation, lc.indentation); + break; + } + else if(indentation == 0) + { + if((lc.rem == "..." || lc.rem.begins_with("... ")) + || + (lc.rem == "---" || lc.rem.begins_with("--- "))) + { + _c4dbgp("scanning block: stop. indentation=0 and stream ended"); + break; + } + } + } + else + { + _c4dbgpf("scanning block: indentation ref not set. firstnonws={}", lc.stripped.first_not_of(' ')); + if(lc.stripped.first_not_of(' ') != npos) // non-empty line + { + _c4dbgpf("scanning block: line not empty. indref={} indprov={} indentation={}", m_state->indref, provisional_indentation, lc.indentation); + if(provisional_indentation == npos) + { + if(lc.indentation < m_state->indref) + { + _c4dbgpf("scanning block: block terminated indentation={} < indref={}", lc.indentation, m_state->indref); + if(raw_block.len == 0) + { + _c4dbgp("scanning block: was empty, undo next line"); + _line_ended_undo(); + } + break; + } + else if(lc.indentation == m_state->indref) + { + if(has_any(RSEQ|RMAP)) + { + _c4dbgpf("scanning block: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_state->indref); + break; + } + } + _c4dbgpf("scanning block: set indentation ref from this line: ref={}", lc.indentation); + indentation = lc.indentation; + } + else + { + if(lc.indentation >= provisional_indentation) + { + _c4dbgpf("scanning block: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation); + //indentation = provisional_indentation ? provisional_indentation : lc.indentation; + indentation = lc.indentation; + } + else + { + break; + //_c4err("parse error: first non-empty block line should have at least the original indentation"); + } + } + } + else // empty line + { + _c4dbgpf("scanning block: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation); + if(provisional_indentation != npos) + { + if(lc.stripped.len >= provisional_indentation) + { + _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len); + provisional_indentation = lc.stripped.len; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if(lc.indentation >= provisional_indentation && lc.indentation != npos) + { + _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation); + provisional_indentation = lc.indentation; + } + #endif + } + else + { + provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL); + _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); + if(provisional_indentation == npos) + { + provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL); + _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); + } + } + } + } + // advance now that we know the folded scalar continues + m_state->line_contents = lc; + _c4dbgpf("scanning block: append '{}'", m_state->line_contents.rem); + raw_block.len += m_state->line_contents.full.len; + _line_progressed(m_state->line_contents.rem.len); + _line_ended(); + ++num_lines; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines) || (raw_block.len == 0)); + C4_UNUSED(num_lines); + C4_UNUSED(first); + + if(indentation == npos) + { + _c4dbgpf("scanning block: set indentation from provisional: {}", provisional_indentation); + indentation = provisional_indentation; + } + + if(num_lines) + _line_ended_undo(); + + _c4dbgpf("scanning block: raw=~~~{}~~~", raw_block); + + // ok! now we strip the newlines and spaces according to the specs + s = _filter_block_scalar(raw_block, newline, chomp, indentation); + + _c4dbgpf("scanning block: final=~~~{}~~~", s); + + return s; +} + + +//----------------------------------------------------------------------------- + +template +bool Parser::_filter_nl(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos, size_t indentation) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfnl(fmt, ...) _c4dbgpf("filter_nl[{}]: " fmt, *i, __VA_ARGS__) + #else + #define _c4dbgfnl(...) + #endif + + const char curr = r[*i]; + bool replaced = false; + + _RYML_CB_ASSERT(m_stack.m_callbacks, indentation != npos); + _RYML_CB_ASSERT(m_stack.m_callbacks, curr == '\n'); + + _c4dbgfnl("found newline. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); + size_t ii = *i; + size_t numnl_following = count_following_newlines(r, &ii, indentation); + if(numnl_following) + { + _c4dbgfnl("{} consecutive (empty) lines {} in the middle. totalws={}", 1+numnl_following, ii < r.len ? "in the middle" : "at the end", ii - *i); + for(size_t j = 0; j < numnl_following; ++j) + m_filter_arena.str[(*pos)++] = '\n'; + } + else + { + if(r.first_not_of(" \t", *i+1) != npos) + { + m_filter_arena.str[(*pos)++] = ' '; + _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); + replaced = true; + } + else + { + if C4_IF_CONSTEXPR (keep_trailing_whitespace) + { + m_filter_arena.str[(*pos)++] = ' '; + _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); + replaced = true; + } + else + { + _c4dbgfnl("last newline, everything else is whitespace. ii={}/{}", ii, r.len); + *i = r.len; + } + } + if C4_IF_CONSTEXPR (backslash_is_escape) + { + if(ii < r.len && r.str[ii] == '\\') + { + const char next = ii+1 < r.len ? r.str[ii+1] : '\0'; + if(next == ' ' || next == '\t') + { + _c4dbgfnl("extend skip to backslash{}", ""); + ++ii; + } + } + } + } + *i = ii - 1; // correct for the loop increment + + #undef _c4dbgfnl + + return replaced; +} + + +//----------------------------------------------------------------------------- + +template +void Parser::_filter_ws(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_nl[{}]: " fmt, *i, __VA_ARGS__) + #else + #define _c4dbgfws(...) + #endif + + const char curr = r[*i]; + _c4dbgfws("found whitespace '{}'", _c4prc(curr)); + _RYML_CB_ASSERT(m_stack.m_callbacks, curr == ' ' || curr == '\t'); + + size_t first = *i > 0 ? r.first_not_of(" \t", *i) : r.first_not_of(' ', *i); + if(first != npos) + { + if(r[first] == '\n' || r[first] == '\r') // skip trailing whitespace + { + _c4dbgfws("whitespace is trailing on line. firstnonws='{}'@{}", _c4prc(r[first]), first); + *i = first - 1; // correct for the loop increment + } + else // a legit whitespace + { + m_filter_arena.str[(*pos)++] = curr; + _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); + } + } + else + { + _c4dbgfws("... everything else is trailing whitespace{}", ""); + if C4_IF_CONSTEXPR (keep_trailing_whitespace) + for(size_t j = *i; j < r.len; ++j) + m_filter_arena.str[(*pos)++] = r[j]; + *i = r.len; + } + + #undef _c4dbgfws +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_plain_scalar(substr s, size_t indentation) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfps(...) _c4dbgpf("filt_plain_scalar" __VA_ARGS__) + #else + #define _c4dbgfps(...) + #endif + + _c4dbgfps("before=~~~{}~~~", s); + + substr r = s.triml(" \t"); + _grow_filter_arena(r.len); + size_t pos = 0; // the filtered size + bool filtered_chars = false; + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r.str[i]; + _c4dbgfps("[{}]: '{}'", i, _c4prc(curr)); + if(curr == ' ' || curr == '\t') + { + _filter_ws(r, &i, &pos); + } + else if(curr == '\n') + { + filtered_chars = _filter_nl(r, &i, &pos, indentation); + } + else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + { + ; + } + else + { + m_filter_arena.str[pos++] = r[i]; + } + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + if(pos < r.len || filtered_chars) + { + r = _finish_filter_arena(r, pos); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); + _c4dbgfps("#filteredchars={} after=~~~{}~~~", s.len - r.len, r); + + #undef _c4dbgfps + return r; +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_squot_scalar(substr s) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfsq(...) _c4dbgpf("filt_squo_scalar") + #else + #define _c4dbgfsq(...) + #endif + + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted + + _c4dbgfsq(": before=~~~{}~~~", s); + + _grow_filter_arena(s.len); + substr r = s; + size_t pos = 0; // the filtered size + bool filtered_chars = false; + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r[i]; + _c4dbgfsq("[{}]: '{}'", i, _c4prc(curr)); + if(curr == ' ' || curr == '\t') + { + _filter_ws(r, &i, &pos); + } + else if(curr == '\n') + { + filtered_chars = _filter_nl(r, &i, &pos, /*indentation*/0); + } + else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + { + ; + } + else if(curr == '\'') + { + char next = i+1 < r.len ? r[i+1] : '\0'; + if(next == '\'') + { + _c4dbgfsq("[{}]: two consecutive quotes", i); + filtered_chars = true; + m_filter_arena.str[pos++] = '\''; + ++i; + } + } + else + { + m_filter_arena.str[pos++] = curr; + } + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + if(pos < r.len || filtered_chars) + { + r = _finish_filter_arena(r, pos); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); + _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); + + #undef _c4dbgfsq + return r; +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_dquot_scalar(substr s) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfdq(...) _c4dbgpf("filt_dquo_scalar" __VA_ARGS__) + #else + #define _c4dbgfdq(...) + #endif + + _c4dbgfdq(": before=~~~{}~~~", s); + + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted + // + // All leading and trailing white space characters are excluded + // from the content. Each continuation line must therefore contain + // at least one non-space character. Empty lines, if any, are + // consumed as part of the line folding. + + _grow_filter_arena(s.len + 2u * s.count('\\')); + substr r = s; + size_t pos = 0; // the filtered size + bool filtered_chars = false; + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r[i]; + _c4dbgfdq("[{}]: '{}'", i, _c4prc(curr)); + if(curr == ' ' || curr == '\t') + { + _filter_ws(r, &i, &pos); + } + else if(curr == '\n') + { + filtered_chars = _filter_nl(r, &i, &pos, /*indentation*/0); + } + else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + { + ; + } + else if(curr == '\\') + { + char next = i+1 < r.len ? r[i+1] : '\0'; + _c4dbgfdq("[{}]: backslash, next='{}'", i, _c4prc(next)); + filtered_chars = true; + if(next == '\r') + { + if(i+2 < r.len && r[i+2] == '\n') + { + ++i; // newline escaped with \ -- skip both (add only one as i is loop-incremented) + next = '\n'; + _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", i); + } + } + // remember the loop will also increment i + if(next == '\n') + { + size_t ii = i + 2; + for( ; ii < r.len; ++ii) + { + if(r.str[ii] == ' ' || r.str[ii] == '\t') // skip leading whitespace + ; + else + break; + } + i += ii - i - 1; + } + else if(next == '"' || next == '/' || next == ' ' || next == '\t') // escapes for json compatibility + { + m_filter_arena.str[pos++] = next; + ++i; + } + else if(next == '\r') + { + //++i; + } + else if(next == 'n') + { + m_filter_arena.str[pos++] = '\n'; + ++i; + } + else if(next == 'r') + { + m_filter_arena.str[pos++] = '\r'; + ++i; // skip + } + else if(next == 't') + { + m_filter_arena.str[pos++] = '\t'; + ++i; + } + else if(next == '\\') + { + m_filter_arena.str[pos++] = '\\'; + ++i; + } + else if(next == 'x') // UTF8 + { + if(i + 1u + 2u >= r.len) + _c4err("\\x requires 2 hex digits"); + uint8_t byteval = {}; + if(!read_hex(r.sub(i + 2u, 2u), &byteval)) + _c4err("failed to read \\x codepoint"); + m_filter_arena.str[pos++] = *(char*)&byteval; + i += 1u + 2u; + } + else if(next == 'u') // UTF16 + { + if(i + 1u + 4u >= r.len) + _c4err("\\u requires 4 hex digits"); + char readbuf[8]; + csubstr codepoint = r.sub(i + 2u, 4u); + uint32_t codepoint_val = {}; + if(!read_hex(codepoint, &codepoint_val)) + _c4err("failed to parse \\u codepoint"); + size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + C4_ASSERT(numbytes <= 4); + memcpy(m_filter_arena.str + pos, readbuf, numbytes); + pos += numbytes; + i += 1u + 4u; + } + else if(next == 'U') // UTF32 + { + if(i + 1u + 8u >= r.len) + _c4err("\\U requires 8 hex digits"); + char readbuf[8]; + csubstr codepoint = r.sub(i + 2u, 8u); + uint32_t codepoint_val = {}; + if(!read_hex(codepoint, &codepoint_val)) + _c4err("failed to parse \\U codepoint"); + size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + C4_ASSERT(numbytes <= 4); + memcpy(m_filter_arena.str + pos, readbuf, numbytes); + pos += numbytes; + i += 1u + 8u; + } + // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char + else if(next == '0') + { + m_filter_arena.str[pos++] = '\0'; + ++i; + } + else if(next == 'b') // backspace + { + m_filter_arena.str[pos++] = '\b'; + ++i; + } + else if(next == 'f') // form feed + { + m_filter_arena.str[pos++] = '\f'; + ++i; + } + else if(next == 'a') // bell character + { + m_filter_arena.str[pos++] = '\a'; + ++i; + } + else if(next == 'v') // vertical tab + { + m_filter_arena.str[pos++] = '\v'; + ++i; + } + else if(next == 'e') // escape character + { + m_filter_arena.str[pos++] = '\x1b'; + ++i; + } + else if(next == '_') // unicode non breaking space \u00a0 + { + // https://www.compart.com/en/unicode/U+00a0 + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x60, 0xa0); + ++i; + } + else if(next == 'N') // unicode next line \u0085 + { + // https://www.compart.com/en/unicode/U+0085 + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x7b, 0x85); + ++i; + } + else if(next == 'L') // unicode line separator \u2028 + { + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x58, 0xa8); + ++i; + } + else if(next == 'P') // unicode paragraph separator \u2029 + { + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x57, 0xa9); + ++i; + } + _c4dbgfdq("[{}]: backslash...sofar=[{}]~~~{}~~~", i, pos, m_filter_arena.first(pos)); + } + else + { + m_filter_arena.str[pos++] = curr; + } + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + if(pos < r.len || filtered_chars) + { + r = _finish_filter_arena(r, pos); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); + _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); + + #undef _c4dbgfdq + + return r; +} + + +//----------------------------------------------------------------------------- +bool Parser::_apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp) +{ + substr trimmed = buf.first(*pos).trimr('\n'); + bool added_newline = false; + switch(chomp) + { + case CHOMP_KEEP: + if(trimmed.len == *pos) + { + _c4dbgpf("chomp=KEEP: add missing newline @{}", *pos); + //m_filter_arena.str[(*pos)++] = '\n'; + added_newline = true; + } + break; + case CHOMP_CLIP: + if(trimmed.len == *pos) + { + _c4dbgpf("chomp=CLIP: add missing newline @{}", *pos); + m_filter_arena.str[(*pos)++] = '\n'; + added_newline = true; + } + else + { + _c4dbgpf("chomp=CLIP: include single trailing newline @{}", trimmed.len+1); + *pos = trimmed.len + 1; + } + break; + case CHOMP_STRIP: + _c4dbgpf("chomp=STRIP: strip {}-{}-{} newlines", *pos, trimmed.len, *pos-trimmed.len); + *pos = trimmed.len; + break; + default: + _c4err("unknown chomp style"); + } + return added_newline; +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block" fmt, __VA_ARGS__) + #else + #define _c4dbgfbl(...) + #endif + + _c4dbgfbl(": indentation={} before=[{}]~~~{}~~~", indentation, s.len, s); + + if(chomp != CHOMP_KEEP && s.trim(" \n\r").len == 0u) + { + _c4dbgp("filt_block: empty scalar"); + return s.first(0); + } + + substr r = s; + + switch(style) + { + case BLOCK_LITERAL: + { + _c4dbgp("filt_block: style=literal"); + // trim leading whitespace up to indentation + { + size_t numws = r.first_not_of(' '); + if(numws != npos) + { + if(numws > indentation) + r = r.sub(indentation); + else + r = r.sub(numws); + _c4dbgfbl(": after triml=[{}]~~~{}~~~", r.len, r); + } + else + { + if(chomp != CHOMP_KEEP || r.len == 0) + { + _c4dbgfbl(": all spaces {}, return empty", r.len); + return r.first(0); + } + else + { + r[0] = '\n'; + return r.first(1); + } + } + } + _grow_filter_arena(s.len + 2u); // use s.len! because we may need to add a newline at the end, so the leading indentation will allow space for that newline + size_t pos = 0; // the filtered size + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r.str[i]; + _c4dbgfbl("[{}]='{}' pos={}", i, _c4prc(curr), pos); + if(curr == '\r') + continue; + m_filter_arena.str[pos++] = curr; + if(curr == '\n') + { + _c4dbgfbl("[{}]: found newline", i); + // skip indentation on the next line + csubstr rem = r.sub(i+1); + size_t first = rem.first_not_of(' '); + if(first != npos) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); + _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, rem.str[first]); + if(first < indentation) + { + _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); + i += first; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + } + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); + first = rem.len; + _c4dbgfbl("[{}]: {} spaces to the end", i, first); + if(first) + { + if(first < indentation) + { + _c4dbgfbl("[{}]: skip everything", i); + --pos; + break; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + } + } + else if(i+1 == r.len) + { + if(chomp == CHOMP_STRIP) + --pos; + break; + } + } + } + } + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= pos); + _c4dbgfbl(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); + bool changed = _apply_chomp(m_filter_arena, &pos, chomp); + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= s.len); + if(pos < r.len || changed) + { + r = _finish_filter_arena(s, pos); // write into s + } + break; + } + case BLOCK_FOLD: + { + _c4dbgp("filt_block: style=fold"); + _grow_filter_arena(r.len + 2); + size_t pos = 0; // the filtered size + bool filtered_chars = false; + bool started = false; + bool is_indented = false; + size_t i = r.first_not_of(' '); + _c4dbgfbl(": first non space at {}", i); + if(i > indentation) + { + is_indented = true; + i = indentation; + } + _c4dbgfbl(": start folding at {}, is_indented={}", i, (int)is_indented); + auto on_change_indentation = [&](size_t numnl_following, size_t last_newl, size_t first_non_whitespace){ + _c4dbgfbl("[{}]: add 1+{} newlines", i, numnl_following); + for(size_t j = 0; j < 1 + numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + for(i = last_newl + 1 + indentation; i < first_non_whitespace; ++i) + { + if(r.str[i] == '\r') + continue; + _c4dbgfbl("[{}]: add '{}'", i, _c4prc(r.str[i])); + m_filter_arena.str[pos++] = r.str[i]; + } + --i; + }; + for( ; i < r.len; ++i) + { + const char curr = r.str[i]; + _c4dbgfbl("[{}]='{}'", i, _c4prc(curr)); + if(curr == '\n') + { + filtered_chars = true; + // skip indentation on the next line, and advance over the next non-indented blank lines as well + size_t first_non_whitespace; + size_t numnl_following = (size_t)-1; + while(r[i] == '\n') + { + ++numnl_following; + csubstr rem = r.sub(i+1); + size_t first = rem.first_not_of(' '); + _c4dbgfbl("[{}]: found newline. first={} rem.len={}", i, first, rem.len); + if(first != npos) + { + first_non_whitespace = first + i+1; + while(first_non_whitespace < r.len && r[first_non_whitespace] == '\r') + ++first_non_whitespace; + _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); + _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, _c4prc(rem.str[first])); + if(first < indentation) + { + _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); + i += first; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + if(first > indentation) + { + _c4dbgfbl("[{}]: {} further indented than {}, stop newlining", i, first, indentation); + goto finished_counting_newlines; + } + } + // prepare the next while loop iteration + // by setting i at the next newline after + // an empty line + if(r[first_non_whitespace] == '\n') + i = first_non_whitespace; + else + goto finished_counting_newlines; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); + first = rem.len; + first_non_whitespace = first + i+1; + if(first) + { + _c4dbgfbl("[{}]: {} spaces to the end", i, first); + if(first < indentation) + { + _c4dbgfbl("[{}]: skip everything", i); + i += first; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + if(first > indentation) + { + _c4dbgfbl("[{}]: {} spaces missing. not done yet", i, indentation - first); + goto finished_counting_newlines; + } + } + } + else // if(i+1 == r.len) + { + _c4dbgfbl("[{}]: it's the final newline", i); + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 == r.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len == 0); + } + goto end_of_scalar; + } + } + end_of_scalar: + // Write all the trailing newlines. Since we're + // at the end no folding is needed, so write every + // newline (add 1). + _c4dbgfbl("[{}]: add {} trailing newlines", i, 1+numnl_following); + for(size_t j = 0; j < 1 + numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + break; + finished_counting_newlines: + _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); + while(first_non_whitespace < r.len && r[first_non_whitespace] == '\t') + ++first_non_whitespace; + _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); + _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace <= r.len); + size_t last_newl = r.last_of('\n', first_non_whitespace); + size_t this_indentation = first_non_whitespace - last_newl - 1; + _c4dbgfbl("[{}]: #newlines={} firstnonws={} lastnewl={} this_indentation={} vs indentation={}", i, numnl_following, first_non_whitespace, last_newl, this_indentation, indentation); + _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace >= last_newl + 1); + _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation >= indentation); + if(!started) + { + _c4dbgfbl("[{}]: #newlines={}. write all leading newlines", i, numnl_following); + for(size_t j = 0; j < 1 + numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + if(this_indentation > indentation) + { + is_indented = true; + _c4dbgfbl("[{}]: advance ->{}", i, last_newl + indentation); + i = last_newl + indentation; + } + else + { + i = first_non_whitespace - 1; + _c4dbgfbl("[{}]: advance ->{}", i, first_non_whitespace); + } + } + else if(this_indentation == indentation) + { + _c4dbgfbl("[{}]: same indentation", i); + if(!is_indented) + { + if(numnl_following == 0) + { + _c4dbgfbl("[{}]: fold!", i); + m_filter_arena.str[pos++] = ' '; + } + else + { + _c4dbgfbl("[{}]: add {} newlines", i, 1 + numnl_following); + for(size_t j = 0; j < numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + } + i = first_non_whitespace - 1; + _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); + } + else + { + _c4dbgfbl("[{}]: back to ref indentation", i); + is_indented = false; + on_change_indentation(numnl_following, last_newl, first_non_whitespace); + _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); + } + } + else + { + _c4dbgfbl("[{}]: increased indentation.", i); + is_indented = true; + _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation > indentation); + on_change_indentation(numnl_following, last_newl, first_non_whitespace); + _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); + } + } + else if(curr != '\r') + { + if(curr != '\t') + started = true; + m_filter_arena.str[pos++] = curr; + } + } + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + _c4dbgfbl(": #filteredchars={} after=[{}]~~~{}~~~", (int)s.len - (int)pos, pos, m_filter_arena.first(pos)); + bool changed = _apply_chomp(m_filter_arena, &pos, chomp); + if(pos < r.len || filtered_chars || changed) + { + r = _finish_filter_arena(s, pos); // write into s + } + } + break; + default: + _c4err("unknown block style"); + } + + _c4dbgfbl(": final=[{}]~~~{}~~~", r.len, r); + + #undef _c4dbgfbl + + return r; +} + +//----------------------------------------------------------------------------- +size_t Parser::_count_nlines(csubstr src) +{ + return 1 + src.count('\n'); +} + +//----------------------------------------------------------------------------- +void Parser::_handle_directive(csubstr directive_) +{ + csubstr directive = directive_; + if(directive.begins_with("%TAG")) + { + TagDirective td; + _c4dbgpf("%TAG directive: {}", directive_); + directive = directive.sub(4); + if(!directive.begins_with(' ')) + _c4err("malformed tag directive: {}", directive_); + directive = directive.triml(' '); + size_t pos = directive.find(' '); + if(pos == npos) + _c4err("malformed tag directive: {}", directive_); + td.handle = directive.first(pos); + directive = directive.sub(td.handle.len).triml(' '); + pos = directive.find(' '); + if(pos != npos) + directive = directive.first(pos); + td.prefix = directive; + td.next_node_id = m_tree->size(); + if(m_tree->size() > 0) + { + size_t prev = m_tree->size() - 1; + if(m_tree->is_root(prev) && m_tree->type(prev) != NOTYPE && !m_tree->is_stream(prev)) + ++td.next_node_id; + } + _c4dbgpf("%TAG: handle={} prefix={} next_node={}", td.handle, td.prefix, td.next_node_id); + m_tree->add_tag_directive(td); + } + else if(directive.begins_with("%YAML")) + { + _c4dbgpf("%YAML directive! ignoring...: {}", directive); + } +} + +//----------------------------------------------------------------------------- +void Parser::set_flags(flag_t f, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64]; + csubstr buf1 = _prfl(buf1_, f); + csubstr buf2 = _prfl(buf2_, s->flags); + _c4dbgpf("state[{}]: setting flags to {}: before={}", s-m_stack.begin(), buf1, buf2); +#endif + s->flags = f; +} + +void Parser::add_flags(flag_t on, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = _prfl(buf1_, on); + csubstr buf2 = _prfl(buf2_, s->flags); + csubstr buf3 = _prfl(buf3_, s->flags|on); + _c4dbgpf("state[{}]: adding flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); +#endif + s->flags |= on; +} + +void Parser::addrem_flags(flag_t on, flag_t off, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64], buf3_[64], buf4_[64]; + csubstr buf1 = _prfl(buf1_, on); + csubstr buf2 = _prfl(buf2_, off); + csubstr buf3 = _prfl(buf3_, s->flags); + csubstr buf4 = _prfl(buf4_, ((s->flags|on)&(~off))); + _c4dbgpf("state[{}]: adding flags {} / removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3, buf4); +#endif + s->flags |= on; + s->flags &= ~off; +} + +void Parser::rem_flags(flag_t off, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = _prfl(buf1_, off); + csubstr buf2 = _prfl(buf2_, s->flags); + csubstr buf3 = _prfl(buf3_, s->flags&(~off)); + _c4dbgpf("state[{}]: removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); +#endif + s->flags &= ~off; +} + +//----------------------------------------------------------------------------- + +csubstr Parser::_prfl(substr buf, flag_t flags) +{ + size_t pos = 0; + bool gotone = false; + + #define _prflag(fl) \ + if((flags & fl) == (fl)) \ + { \ + if(gotone) \ + { \ + if(pos + 1 < buf.len) \ + buf[pos] = '|'; \ + ++pos; \ + } \ + csubstr fltxt = #fl; \ + if(pos + fltxt.len <= buf.len) \ + memcpy(buf.str + pos, fltxt.str, fltxt.len); \ + pos += fltxt.len; \ + gotone = true; \ + } + + _prflag(RTOP); + _prflag(RUNK); + _prflag(RMAP); + _prflag(RSEQ); + _prflag(FLOW); + _prflag(QMRK); + _prflag(RKEY); + _prflag(RVAL); + _prflag(RNXT); + _prflag(SSCL); + _prflag(QSCL); + _prflag(RSET); + _prflag(NDOC); + _prflag(RSEQIMAP); + + #undef _prflag + + RYML_ASSERT(pos <= buf.len); + + return buf.first(pos); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +void Parser::_grow_filter_arena(size_t num_characters_needed) +{ + _c4dbgpf("grow: arena={} numchars={}", m_filter_arena.len, num_characters_needed); + if(num_characters_needed <= m_filter_arena.len) + return; + size_t sz = m_filter_arena.len << 1; + _c4dbgpf("grow: sz={}", sz); + sz = num_characters_needed > sz ? num_characters_needed : sz; + _c4dbgpf("grow: sz={}", sz); + sz = sz < 128u ? 128u : sz; + _c4dbgpf("grow: sz={}", sz); + _RYML_CB_ASSERT(m_stack.m_callbacks, sz >= num_characters_needed); + _resize_filter_arena(sz); +} + +void Parser::_resize_filter_arena(size_t num_characters) +{ + if(num_characters > m_filter_arena.len) + { + _c4dbgpf("resize: sz={}", num_characters); + char *prev = m_filter_arena.str; + if(m_filter_arena.str) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_filter_arena.len > 0); + _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); + } + m_filter_arena.str = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, char, num_characters, prev); + m_filter_arena.len = num_characters; + } +} + +substr Parser::_finish_filter_arena(substr dst, size_t pos) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= dst.len); + memcpy(dst.str, m_filter_arena.str, pos); + return dst.first(pos); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +csubstr Parser::location_contents(Location const& loc) const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, loc.offset < m_buf.len); + return m_buf.sub(loc.offset); +} + +Location Parser::location(ConstNodeRef node) const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, node.valid()); + return location(*node.tree(), node.id()); +} + +Location Parser::location(Tree const& tree, size_t node) const +{ + // try hard to avoid getting the location from a null string. + Location loc; + if(_location_from_node(tree, node, &loc, 0)) + return loc; + return val_location(m_buf.str); +} + +bool Parser::_location_from_node(Tree const& tree, size_t node, Location *C4_RESTRICT loc, size_t level) const +{ + if(tree.has_key(node)) + { + csubstr k = tree.key(node); + if(C4_LIKELY(k.str != nullptr)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, k.is_sub(m_buf)); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(k)); + *loc = val_location(k.str); + return true; + } + } + + if(tree.has_val(node)) + { + csubstr v = tree.val(node); + if(C4_LIKELY(v.str != nullptr)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, v.is_sub(m_buf)); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(v)); + *loc = val_location(v.str); + return true; + } + } + + if(tree.is_container(node)) + { + if(_location_from_cont(tree, node, loc)) + return true; + } + + if(tree.type(node) != NOTYPE && level == 0) + { + // try the prev sibling + { + const size_t prev = tree.prev_sibling(node); + if(prev != NONE) + { + if(_location_from_node(tree, prev, loc, level+1)) + return true; + } + } + // try the next sibling + { + const size_t next = tree.next_sibling(node); + if(next != NONE) + { + if(_location_from_node(tree, next, loc, level+1)) + return true; + } + } + // try the parent + { + const size_t parent = tree.parent(node); + if(parent != NONE) + { + if(_location_from_node(tree, parent, loc, level+1)) + return true; + } + } + } + + return false; +} + +bool Parser::_location_from_cont(Tree const& tree, size_t node, Location *C4_RESTRICT loc) const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, tree.is_container(node)); + if(!tree.is_stream(node)) + { + const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container + if(tree.has_children(node)) + { + size_t child = tree.first_child(node); + if(tree.has_key(child)) + { + // when a map starts, the container was set after the key + csubstr k = tree.key(child); + if(k.str && node_start > k.str) + node_start = k.str; + } + } + *loc = val_location(node_start); + return true; + } + else // it's a stream + { + *loc = val_location(m_buf.str); // just return the front of the buffer + } + return true; +} + + +Location Parser::val_location(const char *val) const +{ + if(C4_UNLIKELY(val == nullptr)) + return {m_file, 0, 0, 0}; + + _RYML_CB_CHECK(m_stack.m_callbacks, m_options.locations()); + // NOTE: if any of these checks fails, the parser needs to be + // instantiated with locations enabled. + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_options.locations()); + _RYML_CB_ASSERT(m_stack.m_callbacks, !_locations_dirty()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size > 0); + // NOTE: the pointer needs to belong to the buffer that was used to parse. + csubstr src = m_buf; + _RYML_CB_CHECK(m_stack.m_callbacks, val != nullptr || src.str == nullptr); + _RYML_CB_CHECK(m_stack.m_callbacks, (val >= src.begin() && val <= src.end()) || (src.str == nullptr && val == nullptr)); + // ok. search the first stored newline after the given ptr + using lineptr_type = size_t const* C4_RESTRICT; + lineptr_type lineptr = nullptr; + size_t offset = (size_t)(val - src.begin()); + if(m_newline_offsets_size < 30) // TODO magic number + { + // just do a linear search if the size is small. + for(lineptr_type curr = m_newline_offsets, last = m_newline_offsets + m_newline_offsets_size; curr < last; ++curr) + { + if(*curr > offset) + { + lineptr = curr; + break; + } + } + } + else + { + // do a bisection search if the size is not small. + // + // We could use std::lower_bound but this is simple enough and + // spares the include of . + size_t count = m_newline_offsets_size; + size_t step; + lineptr_type it; + lineptr = m_newline_offsets; + while(count) + { + step = count >> 1; + it = lineptr + step; + if(*it < offset) + { + lineptr = ++it; + count -= step + 1; + } + else + { + count = step; + } + } + } + _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr >= m_newline_offsets); + _RYML_CB_ASSERT(m_stack.m_callbacks, lineptr <= m_newline_offsets + m_newline_offsets_size); + _RYML_CB_ASSERT(m_stack.m_callbacks, *lineptr > offset); + Location loc; + loc.name = m_file; + loc.offset = offset; + loc.line = (size_t)(lineptr - m_newline_offsets); + if(lineptr > m_newline_offsets) + loc.col = (offset - *(lineptr-1) - 1u); + else + loc.col = offset; + return loc; +} + +void Parser::_prepare_locations() +{ + m_newline_offsets_buf = m_buf; + size_t numnewlines = 1u + m_buf.count('\n'); + _resize_locations(numnewlines); + m_newline_offsets_size = 0; + for(size_t i = 0; i < m_buf.len; i++) + if(m_buf[i] == '\n') + m_newline_offsets[m_newline_offsets_size++] = i; + m_newline_offsets[m_newline_offsets_size++] = m_buf.len; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == numnewlines); +} + +void Parser::_resize_locations(size_t numnewlines) +{ + if(numnewlines > m_newline_offsets_capacity) + { + if(m_newline_offsets) + _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets); + m_newline_offsets_capacity = numnewlines; + } +} + +bool Parser::_locations_dirty() const +{ + return !m_newline_offsets_size; +} + +} // namespace yml +} // namespace c4 + + +#if defined(_MSC_VER) +# pragma warning(pop) +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/node.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + + +namespace c4 { +namespace yml { + + + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +size_t NodeRef::set_key_serialized(c4::fmt::const_base64_wrapper w) +{ + _apply_seed(); + csubstr encoded = this->to_arena(w); + this->set_key(encoded); + return encoded.len; +} + +size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w) +{ + _apply_seed(); + csubstr encoded = this->to_arena(w); + this->set_val(encoded); + return encoded.len; +} + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/preprocess.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_PREPROCESS_HPP_ +#define _C4_YML_PREPROCESS_HPP_ + +/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */ + +/** @defgroup Preprocessors Preprocessor functions + * + * These are the existing preprocessors: + * + * @code{.cpp} + * size_t preprocess_json(csubstr json, substr buf) + * size_t preprocess_rxmap(csubstr json, substr buf) + * @endcode + */ + +#ifndef _C4_YML_COMMON_HPP_ +//included above: +//#include "./common.hpp" +#endif +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp +//#include +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + + + +namespace c4 { +namespace yml { + +namespace detail { +using Preprocessor = size_t(csubstr, substr); +template +substr preprocess_into_container(csubstr input, CharContainer *out) +{ + // try to write once. the preprocessor will stop writing at the end of + // the container, but will process all the input to determine the + // required container size. + size_t sz = PP(input, to_substr(*out)); + // if the container size is not enough, resize, and run again in the + // resized container + if(sz > out->size()) + { + out->resize(sz); + sz = PP(input, to_substr(*out)); + } + return to_substr(*out).first(sz); +} +} // namespace detail + + +//----------------------------------------------------------------------------- + +/** @name preprocess_rxmap + * Convert flow-type relaxed maps (with implicit bools) into strict YAML + * flow map. + * + * @code{.yaml} + * {a, b, c, d: [e, f], g: {a, b}} + * # is converted into this: + * {a: 1, b: 1, c: 1, d: [e, f], g: {a, b}} + * @endcode + + * @note this is NOT recursive - conversion happens only in the top-level map + * @param rxmap A relaxed map + * @param buf output buffer + * @param out output container + */ + +//@{ + +/** Write into a given output buffer. This function is safe to call with + * empty or small buffers; it won't write beyond the end of the buffer. + * + * @return the number of characters required for output + */ +RYML_EXPORT size_t preprocess_rxmap(csubstr rxmap, substr buf); + + +/** Write into an existing container. It is resized to contained the output. + * @return a substr of the container + * @overload preprocess_rxmap */ +template +substr preprocess_rxmap(csubstr rxmap, CharContainer *out) +{ + return detail::preprocess_into_container(rxmap, out); +} + + +/** Create a container with the result. + * @overload preprocess_rxmap */ +template +CharContainer preprocess_rxmap(csubstr rxmap) +{ + CharContainer out; + preprocess_rxmap(rxmap, &out); + return out; +} + +//@} + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_PREPROCESS_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/preprocess.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp +//#include "c4/yml/preprocess.hpp" +#if !defined(C4_YML_PREPROCESS_HPP_) && !defined(_C4_YML_PREPROCESS_HPP_) +#error "amalgamate: file c4/yml/preprocess.hpp must have been included at this point" +#endif /* C4_YML_PREPROCESS_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + + +/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */ + +namespace c4 { +namespace yml { + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace { +C4_ALWAYS_INLINE bool _is_idchar(char c) +{ + return (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || (c == '_' || c == '-' || c == '~' || c == '$'); +} + +typedef enum { kReadPending = 0, kKeyPending = 1, kValPending = 2 } _ppstate; +C4_ALWAYS_INLINE _ppstate _next(_ppstate s) +{ + int n = (int)s + 1; + return (_ppstate)(n <= (int)kValPending ? n : 0); +} +} // empty namespace + + +//----------------------------------------------------------------------------- + +size_t preprocess_rxmap(csubstr s, substr buf) +{ + detail::_SubstrWriter writer(buf); + _ppstate state = kReadPending; + size_t last = 0; + + if(s.begins_with('{')) + { + RYML_CHECK(s.ends_with('}')); + s = s.offs(1, 1); + } + + writer.append('{'); + + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s[i]; + const char next = i+1 < s.len ? s[i+1] : '\0'; + + if(curr == '\'' || curr == '"') + { + csubstr ss = s.sub(i).pair_range_esc(curr, '\\'); + i += static_cast(ss.end() - (s.str + i)); + state = _next(state); + } + else if(state == kReadPending && _is_idchar(curr)) + { + state = _next(state); + } + + switch(state) + { + case kKeyPending: + { + if(curr == ':' && next == ' ') + { + state = _next(state); + } + else if(curr == ',' && next == ' ') + { + writer.append(s.range(last, i)); + writer.append(": 1, "); + last = i + 2; + } + break; + } + case kValPending: + { + if(curr == '[' || curr == '{' || curr == '(') + { + csubstr ss = s.sub(i).pair_range_nested(curr, '\\'); + i += static_cast(ss.end() - (s.str + i)); + state = _next(state); + } + else if(curr == ',' && next == ' ') + { + state = _next(state); + } + break; + } + default: + // nothing to do + break; + } + } + + writer.append(s.sub(last)); + if(state == kKeyPending) + writer.append(": 1"); + writer.append('}'); + + return writer.pos; +} + + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/detail/checks.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/checks.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_YML_DETAIL_CHECKS_HPP_ +#define C4_YML_DETAIL_CHECKS_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + + +#ifdef __clang__ +# pragma clang diagnostic push +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" // error: comparison of unsigned expression >= 0 is always true +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +#endif + +namespace c4 { +namespace yml { + + +void check_invariants(Tree const& t, size_t node=NONE); +void check_free_list(Tree const& t); +void check_arena(Tree const& t); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void check_invariants(Tree const& t, size_t node) +{ + if(node == NONE) + { + if(t.size() == 0) return; + node = t.root_id(); + } + + auto const& n = *t._p(node); +#ifdef RYML_DBG + if(n.m_first_child != NONE || n.m_last_child != NONE) + { + printf("check(%zu): fc=%zu lc=%zu\n", node, n.m_first_child, n.m_last_child); + } + else + { + printf("check(%zu)\n", node); + } +#endif + + C4_CHECK(n.m_parent != node); + if(n.m_parent == NONE) + { + C4_CHECK(t.is_root(node)); + } + else //if(n.m_parent != NONE) + { + C4_CHECK(t.has_child(n.m_parent, node)); + + auto const& p = *t._p(n.m_parent); + if(n.m_prev_sibling == NONE) + { + C4_CHECK(p.m_first_child == node); + C4_CHECK(t.first_sibling(node) == node); + } + else + { + C4_CHECK(p.m_first_child != node); + C4_CHECK(t.first_sibling(node) != node); + } + + if(n.m_next_sibling == NONE) + { + C4_CHECK(p.m_last_child == node); + C4_CHECK(t.last_sibling(node) == node); + } + else + { + C4_CHECK(p.m_last_child != node); + C4_CHECK(t.last_sibling(node) != node); + } + } + + C4_CHECK(n.m_first_child != node); + C4_CHECK(n.m_last_child != node); + if(n.m_first_child != NONE || n.m_last_child != NONE) + { + C4_CHECK(n.m_first_child != NONE); + C4_CHECK(n.m_last_child != NONE); + } + + C4_CHECK(n.m_prev_sibling != node); + C4_CHECK(n.m_next_sibling != node); + if(n.m_prev_sibling != NONE) + { + C4_CHECK(t._p(n.m_prev_sibling)->m_next_sibling == node); + C4_CHECK(t._p(n.m_prev_sibling)->m_prev_sibling != node); + } + if(n.m_next_sibling != NONE) + { + C4_CHECK(t._p(n.m_next_sibling)->m_prev_sibling == node); + C4_CHECK(t._p(n.m_next_sibling)->m_next_sibling != node); + } + + size_t count = 0; + for(size_t i = n.m_first_child; i != NONE; i = t.next_sibling(i)) + { +#ifdef RYML_DBG + printf("check(%zu): descend to child[%zu]=%zu\n", node, count, i); +#endif + auto const& ch = *t._p(i); + C4_CHECK(ch.m_parent == node); + C4_CHECK(ch.m_next_sibling != i); + ++count; + } + C4_CHECK(count == t.num_children(node)); + + if(n.m_prev_sibling == NONE && n.m_next_sibling == NONE) + { + if(n.m_parent != NONE) + { + C4_CHECK(t.num_children(n.m_parent) == 1); + C4_CHECK(t.num_siblings(node) == 1); + } + } + + if(node == t.root_id()) + { + C4_CHECK(t.size() == t.m_size); + C4_CHECK(t.capacity() == t.m_cap); + C4_CHECK(t.m_cap == t.m_size + t.slack()); + check_free_list(t); + check_arena(t); + } + + for(size_t i = t.first_child(node); i != NONE; i = t.next_sibling(i)) + { + check_invariants(t, i); + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void check_free_list(Tree const& t) +{ + if(t.m_free_head == NONE) + { + C4_CHECK(t.m_free_tail == t.m_free_head); + return; + } + + C4_CHECK(t.m_free_head >= 0 && t.m_free_head < t.m_cap); + C4_CHECK(t.m_free_tail >= 0 && t.m_free_tail < t.m_cap); + + auto const& head = *t._p(t.m_free_head); + //auto const& tail = *t._p(t.m_free_tail); + + //C4_CHECK(head.m_prev_sibling == NONE); + //C4_CHECK(tail.m_next_sibling == NONE); + + size_t count = 0; + for(size_t i = t.m_free_head, prev = NONE; i != NONE; i = t._p(i)->m_next_sibling) + { + auto const& elm = *t._p(i); + if(&elm != &head) + { + C4_CHECK(elm.m_prev_sibling == prev); + } + prev = i; + ++count; + } + C4_CHECK(count == t.slack()); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void check_arena(Tree const& t) +{ + C4_CHECK(t.m_arena.len == 0 || (t.m_arena_pos >= 0 && t.m_arena_pos <= t.m_arena.len)); + C4_CHECK(t.arena_size() == t.m_arena_pos); + C4_CHECK(t.arena_slack() + t.m_arena_pos == t.m_arena.len); +} + + +} /* namespace yml */ +} /* namespace c4 */ + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#elif defined(_MSC_VER) +# pragma warning(pop) +#endif + +#endif /* C4_YML_DETAIL_CHECKS_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/checks.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/detail/print.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_YML_DETAIL_PRINT_HPP_ +#define C4_YML_DETAIL_PRINT_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + + + +namespace c4 { +namespace yml { + + +inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bool print_children) +{ + printf("[%zd]%*s[%zd] %p", count, (2*level), "", node, (void*)p.get(node)); + if(p.is_root(node)) + { + printf(" [ROOT]"); + } + printf(" %s:", p.type_str(node)); + if(p.has_key(node)) + { + if(p.has_key_anchor(node)) + { + csubstr ka = p.key_anchor(node); + printf(" &%.*s", (int)ka.len, ka.str); + } + if(p.has_key_tag(node)) + { + csubstr kt = p.key_tag(node); + csubstr k = p.key(node); + printf(" %.*s '%.*s'", (int)kt.len, kt.str, (int)k.len, k.str); + } + else + { + csubstr k = p.key(node); + printf(" '%.*s'", (int)k.len, k.str); + } + } + else + { + RYML_ASSERT( ! p.has_key_tag(node)); + } + if(p.has_val(node)) + { + if(p.has_val_tag(node)) + { + csubstr vt = p.val_tag(node); + csubstr v = p.val(node); + printf(" %.*s '%.*s'", (int)vt.len, vt.str, (int)v.len, v.str); + } + else + { + csubstr v = p.val(node); + printf(" '%.*s'", (int)v.len, v.str); + } + } + else + { + if(p.has_val_tag(node)) + { + csubstr vt = p.val_tag(node); + printf(" %.*s", (int)vt.len, vt.str); + } + } + if(p.has_val_anchor(node)) + { + auto &a = p.val_anchor(node); + printf(" valanchor='&%.*s'", (int)a.len, a.str); + } + printf(" (%zd sibs)", p.num_siblings(node)); + + ++count; + + if(p.is_container(node)) + { + printf(" %zd children:\n", p.num_children(node)); + if(print_children) + { + for(size_t i = p.first_child(node); i != NONE; i = p.next_sibling(i)) + { + count = print_node(p, i, level+1, count, print_children); + } + } + } + else + { + printf("\n"); + } + + return count; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void print_node(ConstNodeRef const& p, int level=0) +{ + print_node(*p.tree(), p.id(), level, 0, true); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline size_t print_tree(Tree const& p, size_t node=NONE) +{ + printf("--------------------------------------\n"); + size_t ret = 0; + if(!p.empty()) + { + if(node == NONE) + node = p.root_id(); + ret = print_node(p, node, 0, 0, true); + } + printf("#nodes=%zd vs #printed=%zd\n", p.size(), ret); + printf("--------------------------------------\n"); + return ret; +} + + +} /* namespace yml */ +} /* namespace c4 */ + + +#endif /* C4_YML_DETAIL_PRINT_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/yml.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/yml.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_YML_HPP_ +#define _C4_YML_YML_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp +//#include "c4/yml/emit.hpp" +#if !defined(C4_YML_EMIT_HPP_) && !defined(_C4_YML_EMIT_HPP_) +#error "amalgamate: file c4/yml/emit.hpp must have been included at this point" +#endif /* C4_YML_EMIT_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp +//#include "c4/yml/parse.hpp" +#if !defined(C4_YML_PARSE_HPP_) && !defined(_C4_YML_PARSE_HPP_) +#error "amalgamate: file c4/yml/parse.hpp must have been included at this point" +#endif /* C4_YML_PARSE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp +//#include "c4/yml/preprocess.hpp" +#if !defined(C4_YML_PREPROCESS_HPP_) && !defined(_C4_YML_PREPROCESS_HPP_) +#error "amalgamate: file c4/yml/preprocess.hpp must have been included at this point" +#endif /* C4_YML_PREPROCESS_HPP_ */ + + +#endif // _C4_YML_YML_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/yml.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/ryml.hpp +// https://github.com/biojppm/rapidyaml/src/ryml.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _RYML_HPP_ +#define _RYML_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/yml.hpp +//#include "c4/yml/yml.hpp" +#if !defined(C4_YML_YML_HPP_) && !defined(_C4_YML_YML_HPP_) +#error "amalgamate: file c4/yml/yml.hpp must have been included at this point" +#endif /* C4_YML_YML_HPP_ */ + + +namespace ryml { +using namespace c4::yml; +using namespace c4; +} + +#endif /* _RYML_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/ryml.hpp) + +#endif /* _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ */ + diff --git a/src/3rd_party/simdjson/simdjson.cpp b/src/3rd_party/simdjson/simdjson.cpp new file mode 100644 index 00000000..5e8e8b8f --- /dev/null +++ b/src/3rd_party/simdjson/simdjson.cpp @@ -0,0 +1,43566 @@ +/* auto-generated on 2023-08-18 14:37:10 -0400. Do not edit! */ +/* including simdjson.cpp: */ +/* begin file simdjson.cpp */ +#define SIMDJSON_SRC_SIMDJSON_CPP + +/* including base.h: #include */ +/* begin file base.h */ +#ifndef SIMDJSON_SRC_BASE_H +#define SIMDJSON_SRC_BASE_H + +/* including simdjson/base.h: #include */ +/* begin file simdjson/base.h */ +/** + * @file Base declarations for all simdjson headers + * @private + */ +#ifndef SIMDJSON_BASE_H +#define SIMDJSON_BASE_H + +/* including simdjson/common_defs.h: #include "simdjson/common_defs.h" */ +/* begin file simdjson/common_defs.h */ +#ifndef SIMDJSON_COMMON_DEFS_H +#define SIMDJSON_COMMON_DEFS_H + +#include +/* including simdjson/compiler_check.h: #include "simdjson/compiler_check.h" */ +/* begin file simdjson/compiler_check.h */ +#ifndef SIMDJSON_COMPILER_CHECK_H +#define SIMDJSON_COMPILER_CHECK_H + +#ifndef __cplusplus +#error simdjson requires a C++ compiler +#endif + +#ifndef SIMDJSON_CPLUSPLUS +#if defined(_MSVC_LANG) && !defined(__clang__) +#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) +#else +#define SIMDJSON_CPLUSPLUS __cplusplus +#endif +#endif + +// C++ 17 +#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) +#define SIMDJSON_CPLUSPLUS17 1 +#endif + +// C++ 14 +#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) +#define SIMDJSON_CPLUSPLUS14 1 +#endif + +// C++ 11 +#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) +#define SIMDJSON_CPLUSPLUS11 1 +#endif + +#ifndef SIMDJSON_CPLUSPLUS11 +#error simdjson requires a compiler compliant with the C++11 standard +#endif + +#endif // SIMDJSON_COMPILER_CHECK_H +/* end file simdjson/compiler_check.h */ +/* including simdjson/portability.h: #include "simdjson/portability.h" */ +/* begin file simdjson/portability.h */ +#ifndef SIMDJSON_PORTABILITY_H +#define SIMDJSON_PORTABILITY_H + +#include +#include +#include +#include +#include +#ifndef _WIN32 +// strcasecmp, strncasecmp +#include +#endif + +#ifdef _MSC_VER +#define SIMDJSON_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + * + * Under clang for Windows, we enable: + * * target pragmas so that part and only part of the + * code gets compiled for advanced instructions. + * + */ +#ifdef __clang__ +// clang under visual studio +#define SIMDJSON_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define SIMDJSON_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + +#if defined(__x86_64__) || defined(_M_AMD64) +#define SIMDJSON_IS_X86_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) +#define SIMDJSON_IS_ARM64 1 +#elif defined(__PPC64__) || defined(_M_PPC64) +#if defined(__ALTIVEC__) +#define SIMDJSON_IS_PPC64_VMX 1 +#endif // defined(__ALTIVEC__) +#else +#define SIMDJSON_IS_32BITS 1 + +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 +#elif defined(__PPC__) || defined(_M_PPC) +#define SIMDJSON_IS_PPC_32BITS 1 +#endif + +#endif // defined(__x86_64__) || defined(_M_AMD64) +#ifndef SIMDJSON_IS_32BITS +#define SIMDJSON_IS_32BITS 0 +#endif + +#if SIMDJSON_IS_32BITS +#ifndef SIMDJSON_NO_PORTABILITY_WARNING +#pragma message("The simdjson library is designed \ +for 64-bit processors and it seems that you are not \ +compiling for a known 64-bit platform. All fast kernels \ +will be disabled and performance may be poor. Please \ +use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") +#endif // SIMDJSON_NO_PORTABILITY_WARNING +#endif // SIMDJSON_IS_32BITS + +#define SIMDJSON_CAT_IMPLEMENTATION_(a,...) a ## __VA_ARGS__ +#define SIMDJSON_CAT(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a,...) #a SIMDJSON_STRINGIFY(__VA_ARGS__) +#define SIMDJSON_STRINGIFY(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +// this is almost standard? +#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_ +#undef SIMDJSON_STRINGIFY +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a +#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) + +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. + +// +// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION +// + +// We are going to use runtime dispatch. +#if SIMDJSON_IS_X86_64 +#ifdef __clang__ +// clang does not have GCC push pop +// warning: clang attribute push can't be used within a namespace in clang up +// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a +// namespace. +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma(SIMDJSON_STRINGIFY( \ + clang attribute push(__attribute__((target(T))), apply_to = function))) +#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop") +#elif defined(__GNUC__) +// GCC is easier +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T))) +#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options") +#endif // clang then gcc + +#endif // x86 + +// Default target region macros don't do anything. +#ifndef SIMDJSON_TARGET_REGION +#define SIMDJSON_TARGET_REGION(T) +#define SIMDJSON_UNTARGET_REGION +#endif + +// Is threading enabled? +#if defined(_REENTRANT) || defined(_MT) +#ifndef SIMDJSON_THREADS_ENABLED +#define SIMDJSON_THREADS_ENABLED +#endif +#endif + +// workaround for large stack sizes under -O0. +// https://github.com/simdjson/simdjson/issues/691 +#ifdef __APPLE__ +#ifndef __OPTIMIZE__ +// Apple systems have small stack sizes in secondary threads. +// Lack of compiler optimization may generate high stack usage. +// Users may want to disable threads for safety, but only when +// in debug mode which we detect by the fact that the __OPTIMIZE__ +// macro is not defined. +#undef SIMDJSON_THREADS_ENABLED +#endif +#endif + + +#if defined(__clang__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) +#elif defined(__GNUC__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) +#else +#define SIMDJSON_NO_SANITIZE_UNDEFINED +#endif + + +#if defined(__clang__) || defined(__GNUC__) +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) +#define SIMDJSON_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# endif // if __has_feature(memory_sanitizer) +#endif // defined(__has_feature) +#endif +// make sure it is defined as 'nothing' if it is unapplicable. +#ifndef SIMDJSON_NO_SANITIZE_MEMORY +#define SIMDJSON_NO_SANITIZE_MEMORY +#endif + +#if SIMDJSON_VISUAL_STUDIO +// This is one case where we do not distinguish between +// regular visual studio and clang under visual studio. +// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) +#define simdjson_strcasecmp _stricmp +#define simdjson_strncasecmp _strnicmp +#else +// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). +// So they are only useful for ASCII in our context. +// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings +#define simdjson_strcasecmp strcasecmp +#define simdjson_strncasecmp strncasecmp +#endif + +#if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, +// then do away with asserts and use __assume. +#if SIMDJSON_VISUAL_STUDIO +#define SIMDJSON_UNREACHABLE() __assume(0) +#define SIMDJSON_ASSUME(COND) __assume(COND) +#else +#define SIMDJSON_UNREACHABLE() __builtin_unreachable(); +#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) +#endif + +#else // defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// This should only ever be enabled in debug mode. +#define SIMDJSON_UNREACHABLE() assert(0); +#define SIMDJSON_ASSUME(COND) assert(COND) + +#endif + +#endif // SIMDJSON_PORTABILITY_H +/* end file simdjson/portability.h */ + +namespace simdjson { +namespace internal { +/** + * @private + * Our own implementation of the C++17 to_chars function. + * Defined in src/to_chars + */ +char *to_chars(char *first, const char *last, double value); +/** + * @private + * A number parsing routine. + * Defined in src/from_chars + */ +double from_chars(const char *first) noexcept; +double from_chars(const char *first, const char* end) noexcept; +} + +#ifndef SIMDJSON_EXCEPTIONS +#if __cpp_exceptions +#define SIMDJSON_EXCEPTIONS 1 +#else +#define SIMDJSON_EXCEPTIONS 0 +#endif +#endif + +} // namespace simdjson + +#if defined(__GNUC__) + // Marks a block with a name so that MCA analysis can see it. + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); +#else + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) + #define SIMDJSON_END_DEBUG_BLOCK(name) + #define SIMDJSON_DEBUG_BLOCK(name, block) +#endif + +// Align to N-byte boundary +#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) +#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) + +#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) + +#if SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline __forceinline + #define simdjson_never_inline __declspec(noinline) + + #define simdjson_unused + #define simdjson_warn_unused + + #ifndef simdjson_likely + #define simdjson_likely(x) x + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) x + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include() + #include + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING + #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline inline __attribute__((always_inline)) + #define simdjson_never_inline inline __attribute__((noinline)) + + #define simdjson_unused __attribute__((unused)) + #define simdjson_warn_unused __attribute__((warn_unused_result)) + + #ifndef simdjson_likely + #define simdjson_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + // We do it separately for clang since it has different warnings. + #ifdef __clang__ + // clang is missing -Wmaybe-uninitialized. + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) + #else // __clang__ + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wformat-security) + #endif // __clang__ + + #define SIMDJSON_PRAGMA(P) _Pragma(#P) + #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) + #if SIMDJSON_CLANG_VISUAL_STUDIO + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused) + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS SIMDJSON_POP_DISABLE_WARNINGS + + + +#endif // MSC_VER + +#if defined(simdjson_inline) + // Prefer the user's definition of simdjson_inline; don't define it ourselves. +#elif defined(__GNUC__) && !defined(__OPTIMIZE__) + // If optimizations are disabled, forcing inlining can lead to significant + // code bloat and high compile times. Don't use simdjson_really_inline for + // unoptimized builds. + #define simdjson_inline inline +#else + // Force inlining for most simdjson functions. + #define simdjson_inline simdjson_really_inline +#endif + +#if SIMDJSON_VISUAL_STUDIO + /** + * Windows users need to do some extra work when building + * or using a dynamic library (DLL). When building, we need + * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport). + * When *using* the DLL, the user needs to set + * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport). + * + * Static libraries not need require such work. + * + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio, you still need to handle these issues. + * + * Non-Windows systems do not have this complexity. + */ + #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY + // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. + // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and + // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) + #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY + // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) + #else + // We assume by default static linkage + #define SIMDJSON_DLLIMPORTEXPORT + #endif + +/** + * Workaround for the vcpkg package manager. Only vcpkg should + * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. + */ +#if SIMDJSON_USING_LIBRARY +#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) +#endif +/** + * End of workaround for the vcpkg package manager. + */ +#else + #define SIMDJSON_DLLIMPORTEXPORT +#endif + +// C++17 requires string_view. +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_HAS_STRING_VIEW +#include // by the standard, this has to be safe. +#endif + +// This macro (__cpp_lib_string_view) has to be defined +// for C++17 and better, but if it is otherwise defined, +// we are going to assume that string_view is available +// even if we do not have C++17 support. +#ifdef __cpp_lib_string_view +#define SIMDJSON_HAS_STRING_VIEW +#endif + +// Some systems have string_view even if we do not have C++17 support, +// and even if __cpp_lib_string_view is undefined, it is the case +// with Apple clang version 11. +// We must handle it. *This is important.* +#ifndef SIMDJSON_HAS_STRING_VIEW +#if defined __has_include +// do not combine the next #if with the previous one (unsafe) +#if __has_include () +// now it is safe to trigger the include +#include // though the file is there, it does not follow that we got the implementation +#if defined(_LIBCPP_STRING_VIEW) +// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, +// included string_view. +// This means that we have string_view *even though* we may not have C++17. +#define SIMDJSON_HAS_STRING_VIEW +#endif // _LIBCPP_STRING_VIEW +#endif // __has_include () +#endif // defined __has_include +#endif // def SIMDJSON_HAS_STRING_VIEW +// end of complicated but important routine to try to detect string_view. + +// +// Backfill std::string_view using nonstd::string_view on systems where +// we expect that string_view is missing. Important: if we get this wrong, +// we will end up with two string_view definitions and potential trouble. +// That is why we work so hard above to avoid it. +// +#ifndef SIMDJSON_HAS_STRING_VIEW +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +/* including simdjson/nonstd/string_view.hpp: #include "simdjson/nonstd/string_view.hpp" */ +/* begin file simdjson/nonstd/string_view.hpp */ +// Copyright 2017-2020 by Martin Moene +// +// string-view lite, a C++17-like string_view for C++98 and later. +// For more information see https://github.com/martinmoene/string-view-lite +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef NONSTD_SV_LITE_H_INCLUDED +#define NONSTD_SV_LITE_H_INCLUDED + +#define string_view_lite_MAJOR 1 +#define string_view_lite_MINOR 7 +#define string_view_lite_PATCH 0 + +#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) + +#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) +#define nssv_STRINGIFY_( x ) #x + +// string-view lite configuration: + +#define nssv_STRING_VIEW_DEFAULT 0 +#define nssv_STRING_VIEW_NONSTD 1 +#define nssv_STRING_VIEW_STD 2 + +// tweak header support: + +#ifdef __has_include +# if __has_include() +# include +# endif +#define nssv_HAVE_TWEAK_HEADER 1 +#else +#define nssv_HAVE_TWEAK_HEADER 0 +//# pragma message("string_view.hpp: Note: Tweak header not supported.") +#endif + +// string_view selection and configuration: + +#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) +# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) +#endif + +#ifndef nssv_CONFIG_STD_SV_OPERATOR +# define nssv_CONFIG_STD_SV_OPERATOR 0 +#endif + +#ifndef nssv_CONFIG_USR_SV_OPERATOR +# define nssv_CONFIG_USR_SV_OPERATOR 1 +#endif + +#ifdef nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 +#endif + +#ifndef nssv_CONFIG_NO_STREAM_INSERTION +# define nssv_CONFIG_NO_STREAM_INSERTION 0 +#endif + +// Control presence of exception handling (try and auto discover): + +#ifndef nssv_CONFIG_NO_EXCEPTIONS +# if defined(_MSC_VER) +# include // for _HAS_EXCEPTIONS +# endif +# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) +# define nssv_CONFIG_NO_EXCEPTIONS 0 +# else +# define nssv_CONFIG_NO_EXCEPTIONS 1 +# endif +#endif + +// C++ language version detection (C++23 is speculative): +// Note: VC14.0/1900 (VS2015) lacks too much from C++14. + +#ifndef nssv_CPLUSPLUS +# if defined(_MSVC_LANG ) && !defined(__clang__) +# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) +# else +# define nssv_CPLUSPLUS __cplusplus +# endif +#endif + +#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) +#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) +#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) +#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202002L ) +#define nssv_CPP23_OR_GREATER ( nssv_CPLUSPLUS >= 202300L ) + +// use C++17 std::string_view if available and requested: + +#if nssv_CPP17_OR_GREATER && defined(__has_include ) +# if __has_include( ) +# define nssv_HAVE_STD_STRING_VIEW 1 +# else +# define nssv_HAVE_STD_STRING_VIEW 0 +# endif +#else +# define nssv_HAVE_STD_STRING_VIEW 0 +#endif + +#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) + +#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) +#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH + +// +// Use C++17 std::string_view: +// + +#if nssv_USES_STD_STRING_VIEW + +#include + +// Extensions for std::string: + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( std::basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string_view +to_string_view( std::basic_string const & s ) +{ + return std::basic_string_view( s.data(), s.size() ); +} + +// Literal operators sv and _sv: + +#if nssv_CONFIG_STD_SV_OPERATOR + +using namespace std::literals::string_view_literals; + +#endif + +#if nssv_CONFIG_USR_SV_OPERATOR + +inline namespace literals { +inline namespace string_view_literals { + + +constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +{ + return std::string_view{ str, len }; +} + +constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +{ + return std::u16string_view{ str, len }; +} + +constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +{ + return std::u32string_view{ str, len }; +} + +constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +{ + return std::wstring_view{ str, len }; +} + +}} // namespace literals::string_view_literals + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +} // namespace nonstd + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +using std::string_view; +using std::wstring_view; +using std::u16string_view; +using std::u32string_view; +using std::basic_string_view; + +// literal "sv" and "_sv", see above + +using std::operator==; +using std::operator!=; +using std::operator<; +using std::operator<=; +using std::operator>; +using std::operator>=; + +using std::operator<<; + +} // namespace nonstd + +#else // nssv_HAVE_STD_STRING_VIEW + +// +// Before C++17: use string_view lite: +// + +// Compiler versions: +// +// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) +// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) +// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) +// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) +// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) +// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) +// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) +// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) +// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) +// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) +// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) + +#if defined(_MSC_VER ) && !defined(__clang__) +# define nssv_COMPILER_MSVC_VER (_MSC_VER ) +# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) +#else +# define nssv_COMPILER_MSVC_VER 0 +# define nssv_COMPILER_MSVC_VERSION 0 +#endif + +#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) + +#if defined( __apple_build_version__ ) +# define nssv_COMPILER_APPLECLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +# define nssv_COMPILER_CLANG_VERSION 0 +#elif defined( __clang__ ) +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION 0 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define nssv_COMPILER_GNUC_VERSION 0 +#endif + +// half-open range [lo..hi): +#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) + +// Presence of language and library features: + +#ifdef _HAS_CPP0X +# define nssv_HAS_CPP0X _HAS_CPP0X +#else +# define nssv_HAS_CPP0X 0 +#endif + +// Unless defined otherwise below, consider VC14 as C++11 for variant-lite: + +#if nssv_COMPILER_MSVC_VER >= 1900 +# undef nssv_CPP11_OR_GREATER +# define nssv_CPP11_OR_GREATER 1 +#endif + +#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) +#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) +#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) +#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) +#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) +#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) + +#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) +#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) + +// Presence of C++11 language features: + +#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 +#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 +#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 +#define nssv_HAVE_IS_DEFAULT nssv_CPP11_140 +#define nssv_HAVE_IS_DELETE nssv_CPP11_140 +#define nssv_HAVE_NOEXCEPT nssv_CPP11_140 +#define nssv_HAVE_NULLPTR nssv_CPP11_100 +#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 +#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 +#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 +#define nssv_HAVE_WCHAR16_T nssv_CPP11_100 +#define nssv_HAVE_WCHAR32_T nssv_CPP11_100 + +#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) +# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 +#else +# define nssv_HAVE_STD_DEFINED_LITERALS 0 +#endif + +// Presence of C++14 language features: + +#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 + +// Presence of C++17 language features: + +#define nssv_HAVE_NODISCARD nssv_CPP17_000 + +// Presence of C++ library features: + +#define nssv_HAVE_STD_HASH nssv_CPP11_120 + +// Presence of compiler intrinsics: + +// Providing char-type specializations for compare() and length() that +// use compiler intrinsics can improve compile- and run-time performance. +// +// The challenge is in using the right combinations of builtin availability +// and its constexpr-ness. +// +// | compiler | __builtin_memcmp (constexpr) | memcmp (constexpr) | +// |----------|------------------------------|---------------------| +// | clang | 4.0 (>= 4.0 ) | any (? ) | +// | clang-a | 9.0 (>= 9.0 ) | any (? ) | +// | gcc | any (constexpr) | any (? ) | +// | msvc | >= 14.2 C++17 (>= 14.2 ) | any (? ) | + +#define nssv_HAVE_BUILTIN_VER ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 ) +#define nssv_HAVE_BUILTIN_CE ( nssv_HAVE_BUILTIN_VER ) + +#define nssv_HAVE_BUILTIN_MEMCMP ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 ) +#define nssv_HAVE_BUILTIN_STRLEN ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 ) + +#ifdef __has_builtin +# define nssv_HAVE_BUILTIN( x ) __has_builtin( x ) +#else +# define nssv_HAVE_BUILTIN( x ) 0 +#endif + +#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_MEMCMP __builtin_memcmp +#else +# define nssv_BUILTIN_MEMCMP memcmp +#endif + +#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_STRLEN __builtin_strlen +#else +# define nssv_BUILTIN_STRLEN strlen +#endif + +// C++ feature usage: + +#if nssv_HAVE_CONSTEXPR_11 +# define nssv_constexpr constexpr +#else +# define nssv_constexpr /*constexpr*/ +#endif + +#if nssv_HAVE_CONSTEXPR_14 +# define nssv_constexpr14 constexpr +#else +# define nssv_constexpr14 /*constexpr*/ +#endif + +#if nssv_HAVE_EXPLICIT_CONVERSION +# define nssv_explicit explicit +#else +# define nssv_explicit /*explicit*/ +#endif + +#if nssv_HAVE_INLINE_NAMESPACE +# define nssv_inline_ns inline +#else +# define nssv_inline_ns /*inline*/ +#endif + +#if nssv_HAVE_NOEXCEPT +# define nssv_noexcept noexcept +#else +# define nssv_noexcept /*noexcept*/ +#endif + +//#if nssv_HAVE_REF_QUALIFIER +//# define nssv_ref_qual & +//# define nssv_refref_qual && +//#else +//# define nssv_ref_qual /*&*/ +//# define nssv_refref_qual /*&&*/ +//#endif + +#if nssv_HAVE_NULLPTR +# define nssv_nullptr nullptr +#else +# define nssv_nullptr NULL +#endif + +#if nssv_HAVE_NODISCARD +# define nssv_nodiscard [[nodiscard]] +#else +# define nssv_nodiscard /*[[nodiscard]]*/ +#endif + +// Additional includes: + +#include +#include +#include +#include +#include // std::char_traits<> + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +# include +#endif + +#if ! nssv_CONFIG_NO_EXCEPTIONS +# include +#endif + +#if nssv_CPP11_OR_GREATER +# include +#endif + +// Clang, GNUC, MSVC warning suppression macros: + +#if defined(__clang__) +# pragma clang diagnostic ignored "-Wreserved-user-defined-literal" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wuser-defined-literals" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wliteral-suffix" +#endif // __clang__ + +#if nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) +# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) +#else +# define nssv_SUPPRESS_MSGSL_WARNING(expr) +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) +# define nssv_DISABLE_MSVC_WARNINGS(codes) +#endif + +#if defined(__clang__) +# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") +#elif defined(__GNUC__) +# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") +#elif nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) +#else +# define nssv_RESTORE_WARNINGS() +#endif + +// Suppress the following MSVC (GSL) warnings: +// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not +// start with an underscore are reserved +// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; +// use brace initialization, gsl::narrow_cast or gsl::narow +// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead + +nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) +//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) +//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) + +namespace nonstd { namespace sv_lite { + +// +// basic_string_view declaration: +// + +template +< + class CharT, + class Traits = std::char_traits +> +class basic_string_view; + +namespace detail { + +// support constexpr comparison in C++14; +// for C++17 and later, use provided traits: + +template< typename CharT > +inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count ) +{ + while ( count-- != 0 ) + { + if ( *s1 < *s2 ) return -1; + if ( *s1 > *s2 ) return +1; + ++s1; ++s2; + } + return 0; +} + +#if nssv_HAVE_BUILTIN_MEMCMP + +// specialization of compare() for char, see also generic compare() above: + +inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count ) +{ + return nssv_BUILTIN_MEMCMP( s1, s2, count ); +} + +#endif + +#if nssv_HAVE_BUILTIN_STRLEN + +// specialization of length() for char, see also generic length() further below: + +inline nssv_constexpr std::size_t length( char const * s ) +{ + return nssv_BUILTIN_STRLEN( s ); +} + +#endif + +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make length() non-recursive: + +template< typename CharT > +inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +{ + return *s == '\0' ? result : length( s + 1, result + 1 ); +} + +#else // OPTIMIZE + +// non-recursive: + +template< typename CharT > +inline nssv_constexpr14 std::size_t length( CharT * s ) +{ + std::size_t result = 0; + while ( *s++ != '\0' ) + { + ++result; + } + return result; +} + +#endif // OPTIMIZE + +#if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make search() non-recursive: + +template< class CharT, class Traits = std::char_traits > +constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + return haystack.starts_with( needle ) ? haystack.begin() : + haystack.empty() ? haystack.end() : search( haystack.substr(1), needle ); +} + +#else // OPTIMIZE + +// non-recursive: + +template< class CharT, class Traits = std::char_traits > +constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + return std::search( haystack.begin(), haystack.end(), needle.begin(), needle.end() ); +} + +#endif // OPTIMIZE +#endif // nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER + +} // namespace detail + +// +// basic_string_view: +// + +template +< + class CharT, + class Traits /* = std::char_traits */ +> +class basic_string_view +{ +public: + // Member types: + + typedef Traits traits_type; + typedef CharT value_type; + + typedef CharT * pointer; + typedef CharT const * const_pointer; + typedef CharT & reference; + typedef CharT const & const_reference; + + typedef const_pointer iterator; + typedef const_pointer const_iterator; + typedef std::reverse_iterator< const_iterator > reverse_iterator; + typedef std::reverse_iterator< const_iterator > const_reverse_iterator; + + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // 24.4.2.1 Construction and assignment: + + nssv_constexpr basic_string_view() nssv_noexcept + : data_( nssv_nullptr ) + , size_( 0 ) + {} + +#if nssv_CPP11_OR_GREATER + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept + : data_( other.data_) + , size_( other.size_) + {} +#endif + + nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept + : data_( s ) + , size_( count ) + {} + + nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept + : data_( s ) +#if nssv_CPP17_OR_GREATER + , size_( Traits::length(s) ) +#elif nssv_CPP11_OR_GREATER + , size_( detail::length(s) ) +#else + , size_( Traits::length(s) ) +#endif + {} + +#if nssv_HAVE_NULLPTR +# if nssv_HAVE_IS_DELETE + nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept = delete; +# else + private: nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept; public: +# endif +#endif + + // Assignment: + +#if nssv_CPP11_OR_GREATER + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept + { + data_ = other.data_; + size_ = other.size_; + return *this; + } +#endif + + // 24.4.2.2 Iterator support: + + nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } + nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } + + nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } + nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } + + nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } + nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } + + nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } + nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } + + // 24.4.2.3 Capacity: + + nssv_constexpr size_type size() const nssv_noexcept { return size_; } + nssv_constexpr size_type length() const nssv_noexcept { return size_; } + nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } + + // since C++20 + nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept + { + return 0 == size_; + } + + // 24.4.2.4 Element access: + + nssv_constexpr const_reference operator[]( size_type pos ) const + { + return data_at( pos ); + } + + nssv_constexpr14 const_reference at( size_type pos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos < size() ); +#else + if ( pos >= size() ) + { + throw std::out_of_range("nonstd::string_view::at()"); + } +#endif + return data_at( pos ); + } + + nssv_constexpr const_reference front() const { return data_at( 0 ); } + nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } + + nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } + + // 24.4.2.5 Modifiers: + + nssv_constexpr14 void remove_prefix( size_type n ) + { + assert( n <= size() ); + data_ += n; + size_ -= n; + } + + nssv_constexpr14 void remove_suffix( size_type n ) + { + assert( n <= size() ); + size_ -= n; + } + + nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept + { + const basic_string_view tmp(other); + other = *this; + *this = tmp; + } + + // 24.4.2.6 String operations: + + size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::copy()"); + } +#endif + const size_type rlen = (std::min)( n, size() - pos ); + + (void) Traits::copy( dest, data() + pos, rlen ); + + return rlen; + } + + nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::substr()"); + } +#endif + return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); + } + + // compare(), 6x: + + nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) + { +#if nssv_CPP17_OR_GREATER + if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#else + if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#endif + { + return result; + } + + return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) + { + return substr( pos1, n1 ).compare( other ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) + { + return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); + } + + nssv_constexpr int compare( CharT const * s ) const // (4) + { + return compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) + { + return substr( pos1, n1 ).compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) + { + return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); + } + + // 24.4.2.7 Searching: + + // starts_with(), 3x, since C++20: + + nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( 0, v.size(), v ) == 0; + } + + nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) + { + return starts_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool starts_with( CharT const * s ) const // (3) + { + return starts_with( basic_string_view( s ) ); + } + + // ends_with(), 3x, since C++20: + + nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; + } + + nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) + { + return ends_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool ends_with( CharT const * s ) const // (3) + { + return ends_with( basic_string_view( s ) ); + } + + // find(), 4x: + + nssv_constexpr size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return assert( v.size() == 0 || v.data() != nssv_nullptr ) + , pos >= size() + ? npos : to_pos( +#if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER + detail::search( substr(pos), v ) +#else + std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) +#endif + ); + } + + nssv_constexpr size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find( CharT const * s, size_type pos = 0 ) const // (4) + { + return find( basic_string_view( s ), pos ); + } + + // rfind(), 4x: + + nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + if ( size() < v.size() ) + { + return npos; + } + + if ( v.empty() ) + { + return (std::min)( size(), pos ); + } + + const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); + const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); + + return result != last ? size_type( result - cbegin() ) : npos; + } + + nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return rfind( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) + { + return rfind( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) + { + return rfind( basic_string_view( s ), pos ); + } + + // find_first_of(), 4x: + + nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find_first_of( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_of( basic_string_view( s ), pos ); + } + + // find_last_of(), 4x: + + nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_of( v, size() - 1 ) + : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_of( basic_string_view( s ), pos ); + } + + // find_first_not_of(), 4x: + + nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_first_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_not_of( basic_string_view( s ), pos ); + } + + // find_last_not_of(), 4x: + + nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_not_of( v, size() - 1 ) + : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_not_of( basic_string_view( s ), pos ); + } + + // Constants: + +#if nssv_CPP17_OR_GREATER + static nssv_constexpr size_type npos = size_type(-1); +#elif nssv_CPP11_OR_GREATER + enum : size_type { npos = size_type(-1) }; +#else + enum { npos = size_type(-1) }; +#endif + +private: + struct not_in_view + { + const basic_string_view v; + + nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} + + nssv_constexpr bool operator()( CharT c ) const + { + return npos == v.find_first_of( c ); + } + }; + + nssv_constexpr size_type to_pos( const_iterator it ) const + { + return it == cend() ? npos : size_type( it - cbegin() ); + } + + nssv_constexpr size_type to_pos( const_reverse_iterator it ) const + { + return it == crend() ? npos : size_type( crend() - it - 1 ); + } + + nssv_constexpr const_reference data_at( size_type pos ) const + { +#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) + return data_[pos]; +#else + return assert( pos < size() ), data_[pos]; +#endif + } + +private: + const_pointer data_; + size_type size_; + +public: +#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS + + template< class Allocator > + basic_string_view( std::basic_string const & s ) nssv_noexcept + : data_( s.data() ) + , size_( s.size() ) + {} + +#if nssv_HAVE_EXPLICIT_CONVERSION + + template< class Allocator > + explicit operator std::basic_string() const + { + return to_string( Allocator() ); + } + +#endif // nssv_HAVE_EXPLICIT_CONVERSION + +#if nssv_CPP11_OR_GREATER + + template< class Allocator = std::allocator > + std::basic_string + to_string( Allocator const & a = Allocator() ) const + { + return std::basic_string( begin(), end(), a ); + } + +#else + + std::basic_string + to_string() const + { + return std::basic_string( begin(), end() ); + } + + template< class Allocator > + std::basic_string + to_string( Allocator const & a ) const + { + return std::basic_string( begin(), end(), a ); + } + +#endif // nssv_CPP11_OR_GREATER + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +}; + +// +// Non-member functions: +// + +// 24.4.3 Non-member comparison functions: +// lexicographically compare two string views (function template): + +template< class CharT, class Traits > +nssv_constexpr bool operator== ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator!= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits > +nssv_constexpr bool operator< ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator<= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator> ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator>= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +// Let S be basic_string_view, and sv be an instance of S. +// Implementations shall provide sufficient additional overloads marked +// constexpr and noexcept so that an object t with an implicit conversion +// to S can be compared according to Table 67. + +#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) + +// accommodate for older compilers: + +// == + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +// <= + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +// > + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +// >= + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +#else // newer compilers: + +#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type + +#if defined(_MSC_VER) // issue 40 +# define nssv_MSVC_ORDER(x) , int=x +#else +# define nssv_MSVC_ORDER(x) /*, int=x*/ +#endif + +// == + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator==( + basic_string_view lhs, + nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator==( + nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator!= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator!= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator< ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator< ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +// <= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator<= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator<= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +// > + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator> ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator> ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +// >= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator>= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator>= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +#undef nssv_MSVC_ORDER +#undef nssv_BASIC_STRING_VIEW_I + +#endif // compiler-dependent approach to comparisons + +// 24.4.4 Inserters and extractors: + +#if ! nssv_CONFIG_NO_STREAM_INSERTION + +namespace detail { + +template< class Stream > +void write_padding( Stream & os, std::streamsize n ) +{ + for ( std::streamsize i = 0; i < n; ++i ) + os.rdbuf()->sputc( os.fill() ); +} + +template< class Stream, class View > +Stream & write_to_stream( Stream & os, View const & sv ) +{ + typename Stream::sentry sentry( os ); + + if ( !sentry ) + return os; + + const std::streamsize length = static_cast( sv.length() ); + + // Whether, and how, to pad: + const bool pad = ( length < os.width() ); + const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; + + if ( left_pad ) + write_padding( os, os.width() - length ); + + // Write span characters: + os.rdbuf()->sputn( sv.begin(), length ); + + if ( pad && !left_pad ) + write_padding( os, os.width() - length ); + + // Reset output stream width: + os.width( 0 ); + + return os; +} + +} // namespace detail + +template< class CharT, class Traits > +std::basic_ostream & +operator<<( + std::basic_ostream& os, + basic_string_view sv ) +{ + return detail::write_to_stream( os, sv ); +} + +#endif // nssv_CONFIG_NO_STREAM_INSERTION + +// Several typedefs for common character types are provided: + +typedef basic_string_view string_view; +typedef basic_string_view wstring_view; +#if nssv_HAVE_WCHAR16_T +typedef basic_string_view u16string_view; +typedef basic_string_view u32string_view; +#endif + +}} // namespace nonstd::sv_lite + +// +// 24.4.6 Suffix for basic_string_view literals: +// + +#if nssv_HAVE_USER_DEFINED_LITERALS + +namespace nonstd { +nssv_inline_ns namespace literals { +nssv_inline_ns namespace string_view_literals { + +#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +#if nssv_CONFIG_USR_SV_OPERATOR + +nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +}}} // namespace nonstd::literals::string_view_literals + +#endif + +// +// Extensions for std::string: +// + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { +namespace sv_lite { + +// Exclude MSVC 14 (19.00): it yields ambiguous to_string(): + +#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#else + +template< class CharT, class Traits > +std::basic_string +to_string( basic_string_view v ) +{ + return std::basic_string( v.begin(), v.end() ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#endif // nssv_CPP11_OR_GREATER + +template< class CharT, class Traits, class Allocator > +basic_string_view +to_string_view( std::basic_string const & s ) +{ + return basic_string_view( s.data(), s.size() ); +} + +}} // namespace nonstd::sv_lite + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +// +// make types and algorithms available in namespace nonstd: +// + +namespace nonstd { + +using sv_lite::basic_string_view; +using sv_lite::string_view; +using sv_lite::wstring_view; + +#if nssv_HAVE_WCHAR16_T +using sv_lite::u16string_view; +#endif +#if nssv_HAVE_WCHAR32_T +using sv_lite::u32string_view; +#endif + +// literal "sv" + +using sv_lite::operator==; +using sv_lite::operator!=; +using sv_lite::operator<; +using sv_lite::operator<=; +using sv_lite::operator>; +using sv_lite::operator>=; + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +using sv_lite::operator<<; +#endif + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +using sv_lite::to_string; +using sv_lite::to_string_view; +#endif + +} // namespace nonstd + +// 24.4.5 Hash support (C++11): + +// Note: The hash value of a string view object is equal to the hash value of +// the corresponding string object. + +#if nssv_HAVE_STD_HASH + +#include + +namespace std { + +template<> +struct hash< nonstd::string_view > +{ +public: + std::size_t operator()( nonstd::string_view v ) const nssv_noexcept + { + return std::hash()( std::string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::wstring_view > +{ +public: + std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept + { + return std::hash()( std::wstring( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u16string_view > +{ +public: + std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept + { + return std::hash()( std::u16string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u32string_view > +{ +public: + std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept + { + return std::hash()( std::u32string( v.data(), v.size() ) ); + } +}; + +} // namespace std + +#endif // nssv_HAVE_STD_HASH + +nssv_RESTORE_WARNINGS() + +#endif // nssv_HAVE_STD_STRING_VIEW +#endif // NONSTD_SV_LITE_H_INCLUDED +/* end file simdjson/nonstd/string_view.hpp */ +SIMDJSON_POP_DISABLE_WARNINGS + +namespace std { + using string_view = nonstd::string_view; +} +#endif // SIMDJSON_HAS_STRING_VIEW +#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. + +/// If EXPR is an error, returns it. +#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + +// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS, +// we want to set it under debug builds. We detect a debug build +// under Visual Studio when the _DEBUG macro is set. Under the other +// compilers, we use the fact that they define __OPTIMIZE__ whenever +// they allow optimizations. +// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS +// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS. +// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer +// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to +// set the __OPTIMIZE__ macro). +#ifndef SIMDJSON_DEVELOPMENT_CHECKS +#ifdef _MSC_VER +// Visual Studio seems to set _DEBUG for debug builds. +#ifdef _DEBUG +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // _DEBUG +#else // _MSC_VER +// All other compilers appear to set __OPTIMIZE__ to a positive integer +// when the compiler is optimizing. +#ifndef __OPTIMIZE__ +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // __OPTIMIZE__ +#endif // _MSC_VER +#endif // SIMDJSON_DEVELOPMENT_CHECKS + +// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" +// feature. + +#if SIMDJSON_CPLUSPLUS17 +// if we have C++, then fallthrough is a default attribute +# define simdjson_fallthrough [[fallthrough]] +// check if we have __attribute__ support +#elif defined(__has_attribute) +// check if we have the __fallthrough__ attribute +#if __has_attribute(__fallthrough__) +// we are good to go: +# define simdjson_fallthrough __attribute__((__fallthrough__)) +#endif // __has_attribute(__fallthrough__) +#endif // SIMDJSON_CPLUSPLUS17 +// on some systems, we simply do not have support for fallthrough, so use a default: +#ifndef simdjson_fallthrough +# define simdjson_fallthrough do {} while (0) /* fallthrough */ +#endif // simdjson_fallthrough + +#if SIMDJSON_DEVELOPMENT_CHECKS +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { assert ((expr)); } while (0) +#else +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { } while (0) +#endif + +#ifndef SIMDJSON_UTF8VALIDATION +#define SIMDJSON_UTF8VALIDATION 1 +#endif + +#ifdef __has_include +// How do we detect that a compiler supports vbmi2? +// For sure if the following header is found, we are ok? +#if __has_include() +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +#ifdef _MSC_VER +#if _MSC_VER >= 1920 +// Visual Studio 2019 and up support VBMI2 under x64 even if the header +// avx512vbmi2intrin.h is not found. +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +// By default, we allow AVX512. +#ifndef SIMDJSON_AVX512_ALLOWED +#define SIMDJSON_AVX512_ALLOWED 1 +#endif + +#endif // SIMDJSON_COMMON_DEFS_H +/* end file simdjson/common_defs.h */ +/* skipped duplicate #include "simdjson/compiler_check.h" */ +/* including simdjson/error.h: #include "simdjson/error.h" */ +/* begin file simdjson/error.h */ +#ifndef SIMDJSON_ERROR_H +#define SIMDJSON_ERROR_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#include +#include + +namespace simdjson { + +/** + * All possible errors returned by simdjson. These error codes are subject to change + * and not all simdjson kernel returns the same error code given the same input: it is not + * well defined which error a given input should produce. + * + * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate + * to true as a Boolean. + */ +enum error_code { + SUCCESS = 0, ///< No error + CAPACITY, ///< This parser can't support a document that big + MEMALLOC, ///< Error allocating memory, most likely out of memory + TAPE_ERROR, ///< Something went wrong, this is a generic error + DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation + STRING_ERROR, ///< Problem while parsing a string + T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' + F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' + N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' + NUMBER_ERROR, ///< Problem while parsing a number + UTF8_ERROR, ///< the input is not valid UTF-8 + UNINITIALIZED, ///< unknown error, or uninitialized document + EMPTY, ///< no structural element found + UNESCAPED_CHARS, ///< found unescaped characters in a string. + UNCLOSED_STRING, ///< missing quote at the end + UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture + INCORRECT_TYPE, ///< JSON element has a different type than user expected + NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits + INDEX_OUT_OF_BOUNDS, ///< JSON array index too large + NO_SUCH_FIELD, ///< JSON field not found in object + IO_ERROR, ///< Error reading a file + INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_URI_FRAGMENT, ///< Invalid URI fragment + UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. + OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order + INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. + OUT_OF_BOUNDS, ///< Attempted to access location outside of document. + TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input + NUM_ERROR_CODES +}; + +/** + * Get the error message for the given error code. + * + * dom::parser parser; + * dom::element doc; + * auto error = parser.parse("foo",3).get(doc); + * if (error) { printf("Error: %s\n", error_message(error)); } + * + * @return The error message. + */ +inline const char *error_message(error_code error) noexcept; + +/** + * Write the error message to the output stream + */ +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; + +/** + * Exception thrown when an exception-supporting simdjson method is called + */ +struct simdjson_error : public std::exception { + /** + * Create an exception from a simdjson error code. + * @param error The error code + */ + simdjson_error(error_code error) noexcept : _error{error} { } + /** The error message */ + const char *what() const noexcept { return error_message(error()); } + /** The error code */ + error_code error() const noexcept { return _error; } +private: + /** The error code that was used */ + error_code _error; +}; + +namespace internal { + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::simdjson_result_base { + * simdjson_result() noexcept : internal::simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct simdjson_result_base : protected std::pair { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result_base() noexcept; + + /** + * Create a new error result. + */ + simdjson_inline simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result_base + +} // namespace internal + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + */ +template +struct simdjson_result : public internal::simdjson_result_base { + /** + * @private Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result() noexcept; + /** + * @private Create a new error result. + */ + simdjson_inline simdjson_result(T &&value) noexcept; + /** + * @private Create a new successful result. + */ + simdjson_inline simdjson_result(error_code error_code) noexcept; + /** + * @private Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result + +#if SIMDJSON_EXCEPTIONS + +template +inline std::ostream& operator<<(std::ostream& out, simdjson_result value) { return out << value.value(); } +#endif // SIMDJSON_EXCEPTIONS + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +/** + * @deprecated This is an alias and will be removed, use error_code instead + */ +using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code; + +/** + * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. + */ +[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] +inline const std::string error_message(int error) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +} // namespace simdjson + +#endif // SIMDJSON_ERROR_H +/* end file simdjson/error.h */ +/* skipped duplicate #include "simdjson/portability.h" */ + +/** + * @brief The top level simdjson namespace, containing everything the library provides. + */ +namespace simdjson { + +SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + +/** The maximum document size supported by simdjson. */ +constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; + +/** + * The amount of padding needed in a buffer to parse JSON. + * + * The input buf should be readable up to buf + SIMDJSON_PADDING + * this is a stopgap; there should be a better description of the + * main loop and its behavior that abstracts over this + * See https://github.com/simdjson/simdjson/issues/174 + */ +constexpr size_t SIMDJSON_PADDING = 64; + +/** + * By default, simdjson supports this many nested objects and arrays. + * + * This is the default for parser::max_depth(). + */ +constexpr size_t DEFAULT_MAX_DEPTH = 1024; + +SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +class implementation; +struct padded_string; +class padded_string_view; +enum class stage1_mode; + +namespace internal { + +template +class atomic_ptr; +class dom_parser_implementation; +class escape_json_string; +class tape_ref; +struct value128; +enum class tape_type; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_BASE_H +/* end file simdjson/base.h */ + +#endif // SIMDJSON_SRC_BASE_H +/* end file base.h */ + +SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + +/* including to_chars.cpp: #include */ +/* begin file to_chars.cpp */ +#ifndef SIMDJSON_SRC_TO_CHARS_CPP +#define SIMDJSON_SRC_TO_CHARS_CPP + +/* skipped duplicate #include */ + +#include +#include +#include +#include + +namespace simdjson { +namespace internal { +/*! +implements the Grisu2 algorithm for binary to decimal floating-point +conversion. +Adapted from JSON for Modern C++ + +This implementation is a slightly modified version of the reference +implementation which may be obtained from +http://florian.loitsch.com/publications (bench.tar.gz). +The code is distributed under the MIT license, Copyright (c) 2009 Florian +Loitsch. For a detailed description of the algorithm see: [1] Loitsch, "Printing +Floating-Point Numbers Quickly and Accurately with Integers", Proceedings of the +ACM SIGPLAN 2010 Conference on Programming Language Design and Implementation, +PLDI 2010 [2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and +Accurately", Proceedings of the ACM SIGPLAN 1996 Conference on Programming +Language Design and Implementation, PLDI 1996 +*/ +namespace dtoa_impl { + +template +Target reinterpret_bits(const Source source) { + static_assert(sizeof(Target) == sizeof(Source), "size mismatch"); + + Target target; + std::memcpy(&target, &source, sizeof(Source)); + return target; +} + +struct diyfp // f * 2^e +{ + static constexpr int kPrecision = 64; // = q + + std::uint64_t f = 0; + int e = 0; + + constexpr diyfp(std::uint64_t f_, int e_) noexcept : f(f_), e(e_) {} + + /*! + @brief returns x - y + @pre x.e == y.e and x.f >= y.f + */ + static diyfp sub(const diyfp &x, const diyfp &y) noexcept { + + return {x.f - y.f, x.e}; + } + + /*! + @brief returns x * y + @note The result is rounded. (Only the upper q bits are returned.) + */ + static diyfp mul(const diyfp &x, const diyfp &y) noexcept { + static_assert(kPrecision == 64, "internal error"); + + // Computes: + // f = round((x.f * y.f) / 2^q) + // e = x.e + y.e + q + + // Emulate the 64-bit * 64-bit multiplication: + // + // p = u * v + // = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi) + // = (u_lo v_lo ) + 2^32 ((u_lo v_hi ) + (u_hi v_lo )) + + // 2^64 (u_hi v_hi ) = (p0 ) + 2^32 ((p1 ) + (p2 )) + // + 2^64 (p3 ) = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo + + // 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3 ) = + // (p0_lo ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi + + // p2_hi + p3) = (p0_lo ) + 2^32 (Q ) + 2^64 (H ) = (p0_lo ) + + // 2^32 (Q_lo + 2^32 Q_hi ) + 2^64 (H ) + // + // (Since Q might be larger than 2^32 - 1) + // + // = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H) + // + // (Q_hi + H does not overflow a 64-bit int) + // + // = p_lo + 2^64 p_hi + + const std::uint64_t u_lo = x.f & 0xFFFFFFFFu; + const std::uint64_t u_hi = x.f >> 32u; + const std::uint64_t v_lo = y.f & 0xFFFFFFFFu; + const std::uint64_t v_hi = y.f >> 32u; + + const std::uint64_t p0 = u_lo * v_lo; + const std::uint64_t p1 = u_lo * v_hi; + const std::uint64_t p2 = u_hi * v_lo; + const std::uint64_t p3 = u_hi * v_hi; + + const std::uint64_t p0_hi = p0 >> 32u; + const std::uint64_t p1_lo = p1 & 0xFFFFFFFFu; + const std::uint64_t p1_hi = p1 >> 32u; + const std::uint64_t p2_lo = p2 & 0xFFFFFFFFu; + const std::uint64_t p2_hi = p2 >> 32u; + + std::uint64_t Q = p0_hi + p1_lo + p2_lo; + + // The full product might now be computed as + // + // p_hi = p3 + p2_hi + p1_hi + (Q >> 32) + // p_lo = p0_lo + (Q << 32) + // + // But in this particular case here, the full p_lo is not required. + // Effectively we only need to add the highest bit in p_lo to p_hi (and + // Q_hi + 1 does not overflow). + + Q += std::uint64_t{1} << (64u - 32u - 1u); // round, ties up + + const std::uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32u); + + return {h, x.e + y.e + 64}; + } + + /*! + @brief normalize x such that the significand is >= 2^(q-1) + @pre x.f != 0 + */ + static diyfp normalize(diyfp x) noexcept { + + while ((x.f >> 63u) == 0) { + x.f <<= 1u; + x.e--; + } + + return x; + } + + /*! + @brief normalize x such that the result has the exponent E + @pre e >= x.e and the upper e - x.e bits of x.f must be zero. + */ + static diyfp normalize_to(const diyfp &x, + const int target_exponent) noexcept { + const int delta = x.e - target_exponent; + + return {x.f << delta, target_exponent}; + } +}; + +struct boundaries { + diyfp w; + diyfp minus; + diyfp plus; +}; + +/*! +Compute the (normalized) diyfp representing the input number 'value' and its +boundaries. +@pre value must be finite and positive +*/ +template boundaries compute_boundaries(FloatType value) { + + // Convert the IEEE representation into a diyfp. + // + // If v is denormal: + // value = 0.F * 2^(1 - bias) = ( F) * 2^(1 - bias - (p-1)) + // If v is normalized: + // value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1)) + + static_assert(std::numeric_limits::is_iec559, + "internal error: dtoa_short requires an IEEE-754 " + "floating-point implementation"); + + constexpr int kPrecision = + std::numeric_limits::digits; // = p (includes the hidden bit) + constexpr int kBias = + std::numeric_limits::max_exponent - 1 + (kPrecision - 1); + constexpr int kMinExp = 1 - kBias; + constexpr std::uint64_t kHiddenBit = std::uint64_t{1} + << (kPrecision - 1); // = 2^(p-1) + + using bits_type = typename std::conditional::type; + + const std::uint64_t bits = reinterpret_bits(value); + const std::uint64_t E = bits >> (kPrecision - 1); + const std::uint64_t F = bits & (kHiddenBit - 1); + + const bool is_denormal = E == 0; + const diyfp v = is_denormal + ? diyfp(F, kMinExp) + : diyfp(F + kHiddenBit, static_cast(E) - kBias); + + // Compute the boundaries m- and m+ of the floating-point value + // v = f * 2^e. + // + // Determine v- and v+, the floating-point predecessor and successor if v, + // respectively. + // + // v- = v - 2^e if f != 2^(p-1) or e == e_min (A) + // = v - 2^(e-1) if f == 2^(p-1) and e > e_min (B) + // + // v+ = v + 2^e + // + // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_ + // between m- and m+ round to v, regardless of how the input rounding + // algorithm breaks ties. + // + // ---+-------------+-------------+-------------+-------------+--- (A) + // v- m- v m+ v+ + // + // -----------------+------+------+-------------+-------------+--- (B) + // v- m- v m+ v+ + + const bool lower_boundary_is_closer = F == 0 && E > 1; + const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1); + const diyfp m_minus = lower_boundary_is_closer + ? diyfp(4 * v.f - 1, v.e - 2) // (B) + : diyfp(2 * v.f - 1, v.e - 1); // (A) + + // Determine the normalized w+ = m+. + const diyfp w_plus = diyfp::normalize(m_plus); + + // Determine w- = m- such that e_(w-) = e_(w+). + const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e); + + return {diyfp::normalize(v), w_minus, w_plus}; +} + +// Given normalized diyfp w, Grisu needs to find a (normalized) cached +// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies +// within a certain range [alpha, gamma] (Definition 3.2 from [1]) +// +// alpha <= e = e_c + e_w + q <= gamma +// +// or +// +// f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q +// <= f_c * f_w * 2^gamma +// +// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies +// +// 2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma +// +// or +// +// 2^(q - 2 + alpha) <= c * w < 2^(q + gamma) +// +// The choice of (alpha,gamma) determines the size of the table and the form of +// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well +// in practice: +// +// The idea is to cut the number c * w = f * 2^e into two parts, which can be +// processed independently: An integral part p1, and a fractional part p2: +// +// f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e +// = (f div 2^-e) + (f mod 2^-e) * 2^e +// = p1 + p2 * 2^e +// +// The conversion of p1 into decimal form requires a series of divisions and +// modulos by (a power of) 10. These operations are faster for 32-bit than for +// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be +// achieved by choosing +// +// -e >= 32 or e <= -32 := gamma +// +// In order to convert the fractional part +// +// p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ... +// +// into decimal form, the fraction is repeatedly multiplied by 10 and the digits +// d[-i] are extracted in order: +// +// (10 * p2) div 2^-e = d[-1] +// (10 * p2) mod 2^-e = d[-2] / 10^1 + ... +// +// The multiplication by 10 must not overflow. It is sufficient to choose +// +// 10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64. +// +// Since p2 = f mod 2^-e < 2^-e, +// +// -e <= 60 or e >= -60 := alpha + +constexpr int kAlpha = -60; +constexpr int kGamma = -32; + +struct cached_power // c = f * 2^e ~= 10^k +{ + std::uint64_t f; + int e; + int k; +}; + +/*! +For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached +power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c +satisfies (Definition 3.2 from [1]) + alpha <= e_c + e + q <= gamma. +*/ +inline cached_power get_cached_power_for_binary_exponent(int e) { + // Now + // + // alpha <= e_c + e + q <= gamma (1) + // ==> f_c * 2^alpha <= c * 2^e * 2^q + // + // and since the c's are normalized, 2^(q-1) <= f_c, + // + // ==> 2^(q - 1 + alpha) <= c * 2^(e + q) + // ==> 2^(alpha - e - 1) <= c + // + // If c were an exact power of ten, i.e. c = 10^k, one may determine k as + // + // k = ceil( log_10( 2^(alpha - e - 1) ) ) + // = ceil( (alpha - e - 1) * log_10(2) ) + // + // From the paper: + // "In theory the result of the procedure could be wrong since c is rounded, + // and the computation itself is approximated [...]. In practice, however, + // this simple function is sufficient." + // + // For IEEE double precision floating-point numbers converted into + // normalized diyfp's w = f * 2^e, with q = 64, + // + // e >= -1022 (min IEEE exponent) + // -52 (p - 1) + // -52 (p - 1, possibly normalize denormal IEEE numbers) + // -11 (normalize the diyfp) + // = -1137 + // + // and + // + // e <= +1023 (max IEEE exponent) + // -52 (p - 1) + // -11 (normalize the diyfp) + // = 960 + // + // This binary exponent range [-1137,960] results in a decimal exponent + // range [-307,324]. One does not need to store a cached power for each + // k in this range. For each such k it suffices to find a cached power + // such that the exponent of the product lies in [alpha,gamma]. + // This implies that the difference of the decimal exponents of adjacent + // table entries must be less than or equal to + // + // floor( (gamma - alpha) * log_10(2) ) = 8. + // + // (A smaller distance gamma-alpha would require a larger table.) + + // NB: + // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34. + + constexpr int kCachedPowersMinDecExp = -300; + constexpr int kCachedPowersDecStep = 8; + + static constexpr std::array kCachedPowers = {{ + {0xAB70FE17C79AC6CA, -1060, -300}, {0xFF77B1FCBEBCDC4F, -1034, -292}, + {0xBE5691EF416BD60C, -1007, -284}, {0x8DD01FAD907FFC3C, -980, -276}, + {0xD3515C2831559A83, -954, -268}, {0x9D71AC8FADA6C9B5, -927, -260}, + {0xEA9C227723EE8BCB, -901, -252}, {0xAECC49914078536D, -874, -244}, + {0x823C12795DB6CE57, -847, -236}, {0xC21094364DFB5637, -821, -228}, + {0x9096EA6F3848984F, -794, -220}, {0xD77485CB25823AC7, -768, -212}, + {0xA086CFCD97BF97F4, -741, -204}, {0xEF340A98172AACE5, -715, -196}, + {0xB23867FB2A35B28E, -688, -188}, {0x84C8D4DFD2C63F3B, -661, -180}, + {0xC5DD44271AD3CDBA, -635, -172}, {0x936B9FCEBB25C996, -608, -164}, + {0xDBAC6C247D62A584, -582, -156}, {0xA3AB66580D5FDAF6, -555, -148}, + {0xF3E2F893DEC3F126, -529, -140}, {0xB5B5ADA8AAFF80B8, -502, -132}, + {0x87625F056C7C4A8B, -475, -124}, {0xC9BCFF6034C13053, -449, -116}, + {0x964E858C91BA2655, -422, -108}, {0xDFF9772470297EBD, -396, -100}, + {0xA6DFBD9FB8E5B88F, -369, -92}, {0xF8A95FCF88747D94, -343, -84}, + {0xB94470938FA89BCF, -316, -76}, {0x8A08F0F8BF0F156B, -289, -68}, + {0xCDB02555653131B6, -263, -60}, {0x993FE2C6D07B7FAC, -236, -52}, + {0xE45C10C42A2B3B06, -210, -44}, {0xAA242499697392D3, -183, -36}, + {0xFD87B5F28300CA0E, -157, -28}, {0xBCE5086492111AEB, -130, -20}, + {0x8CBCCC096F5088CC, -103, -12}, {0xD1B71758E219652C, -77, -4}, + {0x9C40000000000000, -50, 4}, {0xE8D4A51000000000, -24, 12}, + {0xAD78EBC5AC620000, 3, 20}, {0x813F3978F8940984, 30, 28}, + {0xC097CE7BC90715B3, 56, 36}, {0x8F7E32CE7BEA5C70, 83, 44}, + {0xD5D238A4ABE98068, 109, 52}, {0x9F4F2726179A2245, 136, 60}, + {0xED63A231D4C4FB27, 162, 68}, {0xB0DE65388CC8ADA8, 189, 76}, + {0x83C7088E1AAB65DB, 216, 84}, {0xC45D1DF942711D9A, 242, 92}, + {0x924D692CA61BE758, 269, 100}, {0xDA01EE641A708DEA, 295, 108}, + {0xA26DA3999AEF774A, 322, 116}, {0xF209787BB47D6B85, 348, 124}, + {0xB454E4A179DD1877, 375, 132}, {0x865B86925B9BC5C2, 402, 140}, + {0xC83553C5C8965D3D, 428, 148}, {0x952AB45CFA97A0B3, 455, 156}, + {0xDE469FBD99A05FE3, 481, 164}, {0xA59BC234DB398C25, 508, 172}, + {0xF6C69A72A3989F5C, 534, 180}, {0xB7DCBF5354E9BECE, 561, 188}, + {0x88FCF317F22241E2, 588, 196}, {0xCC20CE9BD35C78A5, 614, 204}, + {0x98165AF37B2153DF, 641, 212}, {0xE2A0B5DC971F303A, 667, 220}, + {0xA8D9D1535CE3B396, 694, 228}, {0xFB9B7CD9A4A7443C, 720, 236}, + {0xBB764C4CA7A44410, 747, 244}, {0x8BAB8EEFB6409C1A, 774, 252}, + {0xD01FEF10A657842C, 800, 260}, {0x9B10A4E5E9913129, 827, 268}, + {0xE7109BFBA19C0C9D, 853, 276}, {0xAC2820D9623BF429, 880, 284}, + {0x80444B5E7AA7CF85, 907, 292}, {0xBF21E44003ACDD2D, 933, 300}, + {0x8E679C2F5E44FF8F, 960, 308}, {0xD433179D9C8CB841, 986, 316}, + {0x9E19DB92B4E31BA9, 1013, 324}, + }}; + + // This computation gives exactly the same results for k as + // k = ceil((kAlpha - e - 1) * 0.30102999566398114) + // for |e| <= 1500, but doesn't require floating-point operations. + // NB: log_10(2) ~= 78913 / 2^18 + const int f = kAlpha - e - 1; + const int k = (f * 78913) / (1 << 18) + static_cast(f > 0); + + const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / + kCachedPowersDecStep; + + const cached_power cached = kCachedPowers[static_cast(index)]; + + return cached; +} + +/*! +For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k. +For n == 0, returns 1 and sets pow10 := 1. +*/ +inline int find_largest_pow10(const std::uint32_t n, std::uint32_t &pow10) { + // LCOV_EXCL_START + if (n >= 1000000000) { + pow10 = 1000000000; + return 10; + } + // LCOV_EXCL_STOP + else if (n >= 100000000) { + pow10 = 100000000; + return 9; + } else if (n >= 10000000) { + pow10 = 10000000; + return 8; + } else if (n >= 1000000) { + pow10 = 1000000; + return 7; + } else if (n >= 100000) { + pow10 = 100000; + return 6; + } else if (n >= 10000) { + pow10 = 10000; + return 5; + } else if (n >= 1000) { + pow10 = 1000; + return 4; + } else if (n >= 100) { + pow10 = 100; + return 3; + } else if (n >= 10) { + pow10 = 10; + return 2; + } else { + pow10 = 1; + return 1; + } +} + +inline void grisu2_round(char *buf, int len, std::uint64_t dist, + std::uint64_t delta, std::uint64_t rest, + std::uint64_t ten_k) { + + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // ten_k + // <------> + // <---- rest ----> + // --------------[------------------+----+--------------]-------------- + // w V + // = buf * 10^k + // + // ten_k represents a unit-in-the-last-place in the decimal representation + // stored in buf. + // Decrement buf by ten_k while this takes buf closer to w. + + // The tests are written in this order to avoid overflow in unsigned + // integer arithmetic. + + while (rest < dist && delta - rest >= ten_k && + (rest + ten_k < dist || dist - rest > rest + ten_k - dist)) { + buf[len - 1]--; + rest += ten_k; + } +} + +/*! +Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+. +M- and M+ must be normalized and share the same exponent -60 <= e <= -32. +*/ +inline void grisu2_digit_gen(char *buffer, int &length, int &decimal_exponent, + diyfp M_minus, diyfp w, diyfp M_plus) { + static_assert(kAlpha >= -60, "internal error"); + static_assert(kGamma <= -32, "internal error"); + + // Generates the digits (and the exponent) of a decimal floating-point + // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's + // w, M- and M+ share the same exponent e, which satisfies alpha <= e <= + // gamma. + // + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // Grisu2 generates the digits of M+ from left to right and stops as soon as + // V is in [M-,M+]. + + std::uint64_t delta = + diyfp::sub(M_plus, M_minus) + .f; // (significand of (M+ - M-), implicit exponent is e) + std::uint64_t dist = + diyfp::sub(M_plus, w) + .f; // (significand of (M+ - w ), implicit exponent is e) + + // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0): + // + // M+ = f * 2^e + // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e + // = ((p1 ) * 2^-e + (p2 )) * 2^e + // = p1 + p2 * 2^e + + const diyfp one(std::uint64_t{1} << -M_plus.e, M_plus.e); + + auto p1 = static_cast( + M_plus.f >> + -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.) + std::uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e + + // 1) + // + // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0] + + std::uint32_t pow10; + const int k = find_largest_pow10(p1, pow10); + + // 10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1) + // + // p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1)) + // = (d[k-1] ) * 10^(k-1) + (p1 mod 10^(k-1)) + // + // M+ = p1 + p2 * 2^e + // = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1)) + p2 * 2^e + // = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e + // = d[k-1] * 10^(k-1) + ( rest) * 2^e + // + // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0) + // + // p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0] + // + // but stop as soon as + // + // rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e + + int n = k; + while (n > 0) { + // Invariants: + // M+ = buffer * 10^n + (p1 + p2 * 2^e) (buffer = 0 for n = k) + // pow10 = 10^(n-1) <= p1 < 10^n + // + const std::uint32_t d = p1 / pow10; // d = p1 div 10^(n-1) + const std::uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1) + // + // M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e + // = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e) + // + buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(n-1) + (r + p2 * 2^e) + // + p1 = r; + n--; + // + // M+ = buffer * 10^n + (p1 + p2 * 2^e) + // pow10 = 10^n + // + + // Now check if enough digits have been generated. + // Compute + // + // p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e + // + // Note: + // Since rest and delta share the same exponent e, it suffices to + // compare the significands. + const std::uint64_t rest = (std::uint64_t{p1} << -one.e) + p2; + if (rest <= delta) { + // V = buffer * 10^n, with M- <= V <= M+. + + decimal_exponent += n; + + // We may now just stop. But instead look if the buffer could be + // decremented to bring V closer to w. + // + // pow10 = 10^n is now 1 ulp in the decimal representation V. + // The rounding procedure works with diyfp's with an implicit + // exponent of e. + // + // 10^n = (10^n * 2^-e) * 2^e = ulp * 2^e + // + const std::uint64_t ten_n = std::uint64_t{pow10} << -one.e; + grisu2_round(buffer, length, dist, delta, rest, ten_n); + + return; + } + + pow10 /= 10; + // + // pow10 = 10^(n-1) <= p1 < 10^n + // Invariants restored. + } + + // 2) + // + // The digits of the integral part have been generated: + // + // M+ = d[k-1]...d[1]d[0] + p2 * 2^e + // = buffer + p2 * 2^e + // + // Now generate the digits of the fractional part p2 * 2^e. + // + // Note: + // No decimal point is generated: the exponent is adjusted instead. + // + // p2 actually represents the fraction + // + // p2 * 2^e + // = p2 / 2^-e + // = d[-1] / 10^1 + d[-2] / 10^2 + ... + // + // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...) + // + // p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m + // + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...) + // + // using + // + // 10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e) + // = ( d) * 2^-e + ( r) + // + // or + // 10^m * p2 * 2^e = d + r * 2^e + // + // i.e. + // + // M+ = buffer + p2 * 2^e + // = buffer + 10^-m * (d + r * 2^e) + // = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e + // + // and stop as soon as 10^-m * r * 2^e <= delta * 2^e + + int m = 0; + for (;;) { + // Invariant: + // M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...) + // * 2^e + // = buffer * 10^-m + 10^-m * (p2 ) + // * 2^e = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e = + // buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e + + // (10*p2 mod 2^-e)) * 2^e + // + p2 *= 10; + const std::uint64_t d = p2 >> -one.e; // d = (10 * p2) div 2^-e + const std::uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e + // + // M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e + // = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e)) + // = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + buffer[length++] = static_cast('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + p2 = r; + m++; + // + // M+ = buffer * 10^-m + 10^-m * p2 * 2^e + // Invariant restored. + + // Check if enough digits have been generated. + // + // 10^-m * p2 * 2^e <= delta * 2^e + // p2 * 2^e <= 10^m * delta * 2^e + // p2 <= 10^m * delta + delta *= 10; + dist *= 10; + if (p2 <= delta) { + break; + } + } + + // V = buffer * 10^-m, with M- <= V <= M+. + + decimal_exponent -= m; + + // 1 ulp in the decimal representation is now 10^-m. + // Since delta and dist are now scaled by 10^m, we need to do the + // same with ulp in order to keep the units in sync. + // + // 10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e + // + const std::uint64_t ten_m = one.f; + grisu2_round(buffer, length, dist, delta, p2, ten_m); + + // By construction this algorithm generates the shortest possible decimal + // number (Loitsch, Theorem 6.2) which rounds back to w. + // For an input number of precision p, at least + // + // N = 1 + ceil(p * log_10(2)) + // + // decimal digits are sufficient to identify all binary floating-point + // numbers (Matula, "In-and-Out conversions"). + // This implies that the algorithm does not produce more than N decimal + // digits. + // + // N = 17 for p = 53 (IEEE double precision) + // N = 9 for p = 24 (IEEE single precision) +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +inline void grisu2(char *buf, int &len, int &decimal_exponent, diyfp m_minus, + diyfp v, diyfp m_plus) { + + // --------(-----------------------+-----------------------)-------- (A) + // m- v m+ + // + // --------------------(-----------+-----------------------)-------- (B) + // m- v m+ + // + // First scale v (and m- and m+) such that the exponent is in the range + // [alpha, gamma]. + + const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e); + + const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k + + // The exponent of the products is = v.e + c_minus_k.e + q and is in the range + // [alpha,gamma] + const diyfp w = diyfp::mul(v, c_minus_k); + const diyfp w_minus = diyfp::mul(m_minus, c_minus_k); + const diyfp w_plus = diyfp::mul(m_plus, c_minus_k); + + // ----(---+---)---------------(---+---)---------------(---+---)---- + // w- w w+ + // = c*m- = c*v = c*m+ + // + // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and + // w+ are now off by a small amount. + // In fact: + // + // w - v * 10^k < 1 ulp + // + // To account for this inaccuracy, add resp. subtract 1 ulp. + // + // --------+---[---------------(---+---)---------------]---+-------- + // w- M- w M+ w+ + // + // Now any number in [M-, M+] (bounds included) will round to w when input, + // regardless of how the input rounding algorithm breaks ties. + // + // And digit_gen generates the shortest possible such number in [M-, M+]. + // Note that this does not mean that Grisu2 always generates the shortest + // possible number in the interval (m-, m+). + const diyfp M_minus(w_minus.f + 1, w_minus.e); + const diyfp M_plus(w_plus.f - 1, w_plus.e); + + decimal_exponent = -cached.k; // = -(-k) = k + + grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus); +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +template +void grisu2(char *buf, int &len, int &decimal_exponent, FloatType value) { + static_assert(diyfp::kPrecision >= std::numeric_limits::digits + 3, + "internal error: not enough precision"); + + // If the neighbors (and boundaries) of 'value' are always computed for + // double-precision numbers, all float's can be recovered using strtod (and + // strtof). However, the resulting decimal representations are not exactly + // "short". + // + // The documentation for 'std::to_chars' + // (https://en.cppreference.com/w/cpp/utility/to_chars) says "value is + // converted to a string as if by std::sprintf in the default ("C") locale" + // and since sprintf promotes float's to double's, I think this is exactly + // what 'std::to_chars' does. On the other hand, the documentation for + // 'std::to_chars' requires that "parsing the representation using the + // corresponding std::from_chars function recovers value exactly". That + // indicates that single precision floating-point numbers should be recovered + // using 'std::strtof'. + // + // NB: If the neighbors are computed for single-precision numbers, there is a + // single float + // (7.0385307e-26f) which can't be recovered using strtod. The resulting + // double precision value is off by 1 ulp. +#if 0 + const boundaries w = compute_boundaries(static_cast(value)); +#else + const boundaries w = compute_boundaries(value); +#endif + + grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus); +} + +/*! +@brief appends a decimal representation of e to buf +@return a pointer to the element following the exponent. +@pre -1000 < e < 1000 +*/ +inline char *append_exponent(char *buf, int e) { + + if (e < 0) { + e = -e; + *buf++ = '-'; + } else { + *buf++ = '+'; + } + + auto k = static_cast(e); + if (k < 10) { + // Always print at least two digits in the exponent. + // This is for compatibility with printf("%g"). + *buf++ = '0'; + *buf++ = static_cast('0' + k); + } else if (k < 100) { + *buf++ = static_cast('0' + k / 10); + k %= 10; + *buf++ = static_cast('0' + k); + } else { + *buf++ = static_cast('0' + k / 100); + k %= 100; + *buf++ = static_cast('0' + k / 10); + k %= 10; + *buf++ = static_cast('0' + k); + } + + return buf; +} + +/*! +@brief prettify v = buf * 10^decimal_exponent +If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point +notation. Otherwise it will be printed in exponential notation. +@pre min_exp < 0 +@pre max_exp > 0 +*/ +inline char *format_buffer(char *buf, int len, int decimal_exponent, + int min_exp, int max_exp) { + + const int k = len; + const int n = len + decimal_exponent; + + // v = buf * 10^(n-k) + // k is the length of the buffer (number of decimal digits) + // n is the position of the decimal point relative to the start of the buffer. + + if (k <= n && n <= max_exp) { + // digits[000] + // len <= max_exp + 2 + + std::memset(buf + k, '0', static_cast(n) - static_cast(k)); + // Make it look like a floating-point number (#362, #378) + buf[n + 0] = '.'; + buf[n + 1] = '0'; + return buf + (static_cast(n)) + 2; + } + + if (0 < n && n <= max_exp) { + // dig.its + // len <= max_digits10 + 1 + std::memmove(buf + (static_cast(n) + 1), buf + n, + static_cast(k) - static_cast(n)); + buf[n] = '.'; + return buf + (static_cast(k) + 1U); + } + + if (min_exp < n && n <= 0) { + // 0.[000]digits + // len <= 2 + (-min_exp - 1) + max_digits10 + + std::memmove(buf + (2 + static_cast(-n)), buf, + static_cast(k)); + buf[0] = '0'; + buf[1] = '.'; + std::memset(buf + 2, '0', static_cast(-n)); + return buf + (2U + static_cast(-n) + static_cast(k)); + } + + if (k == 1) { + // dE+123 + // len <= 1 + 5 + + buf += 1; + } else { + // d.igitsE+123 + // len <= max_digits10 + 1 + 5 + + std::memmove(buf + 2, buf + 1, static_cast(k) - 1); + buf[1] = '.'; + buf += 1 + static_cast(k); + } + + *buf++ = 'e'; + return append_exponent(buf, n - 1); +} + +} // namespace dtoa_impl + +/*! +The format of the resulting decimal representation is similar to printf's %g +format. Returns an iterator pointing past-the-end of the decimal representation. +@note The input number must be finite, i.e. NaN's and Inf's are not supported. +@note The buffer must be large enough. +@note The result is NOT null-terminated. +*/ +char *to_chars(char *first, const char *last, double value) { + static_cast(last); // maybe unused - fix warning + bool negative = std::signbit(value); + if (negative) { + value = -value; + *first++ = '-'; + } + + if (value == 0) // +-0 + { + *first++ = '0'; + // Make it look like a floating-point number (#362, #378) + *first++ = '.'; + *first++ = '0'; + return first; + } + // Compute v = buffer * 10^decimal_exponent. + // The decimal digits are stored in the buffer, which needs to be interpreted + // as an unsigned decimal integer. + // len is the length of the buffer, i.e. the number of decimal digits. + int len = 0; + int decimal_exponent = 0; + dtoa_impl::grisu2(first, len, decimal_exponent, value); + // Format the buffer like printf("%.*g", prec, value) + constexpr int kMinExp = -4; + constexpr int kMaxExp = std::numeric_limits::digits10; + + return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp, + kMaxExp); +} +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_SRC_TO_CHARS_CPP +/* end file to_chars.cpp */ +/* including from_chars.cpp: #include */ +/* begin file from_chars.cpp */ +#ifndef SIMDJSON_SRC_FROM_CHARS_CPP +#define SIMDJSON_SRC_FROM_CHARS_CPP + +/* skipped duplicate #include */ + +#include +#include +#include + +namespace simdjson { +namespace internal { + +/** + * The code in the internal::from_chars function is meant to handle the floating-point number parsing + * when we have more than 19 digits in the decimal mantissa. This should only be seen + * in adversarial scenarios: we do not expect production systems to even produce + * such floating-point numbers. + * + * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/) + * who credits Ken Thompson for the design (via a reference to the Go source + * code). See + * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c + * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c + * It is probably not very fast but it is a fallback that should almost never be + * called in real life. Google Wuffs is published under APL 2.0. + **/ + +namespace { +constexpr uint32_t max_digits = 768; +constexpr int32_t decimal_point_range = 2047; +} // namespace + +struct adjusted_mantissa { + uint64_t mantissa; + int power2; + adjusted_mantissa() : mantissa(0), power2(0) {} +}; + +struct decimal { + uint32_t num_digits; + int32_t decimal_point; + bool negative; + bool truncated; + uint8_t digits[max_digits]; +}; + +template struct binary_format { + static constexpr int mantissa_explicit_bits(); + static constexpr int minimum_exponent(); + static constexpr int infinite_power(); + static constexpr int sign_index(); +}; + +template <> constexpr int binary_format::mantissa_explicit_bits() { + return 52; +} + +template <> constexpr int binary_format::minimum_exponent() { + return -1023; +} +template <> constexpr int binary_format::infinite_power() { + return 0x7FF; +} + +template <> constexpr int binary_format::sign_index() { return 63; } + +bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); } + +// This should always succeed since it follows a call to parse_number. +decimal parse_decimal(const char *&p) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while (*p == '0') { + ++p; + } + while (is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if (*p == '.') { + ++p; + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while (is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if (('e' == *p) || ('E' == *p)) { + ++p; + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while (is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + +// This should always succeed since it follows a call to parse_number. +// Will not read at or beyond the "end" pointer. +decimal parse_decimal(const char *&p, const char * end) noexcept { + decimal answer; + answer.num_digits = 0; + answer.decimal_point = 0; + answer.truncated = false; + if(p == end) { return answer; } // should never happen + answer.negative = (*p == '-'); + if ((*p == '-') || (*p == '+')) { + ++p; + } + + while ((p != end) && (*p == '0')) { + ++p; + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + if ((p != end) && (*p == '.')) { + ++p; + if(p == end) { return answer; } // should never happen + const char *first_after_period = p; + // if we have not yet encountered a zero, we have to skip it as well + if (answer.num_digits == 0) { + // skip zeros + while (*p == '0') { + ++p; + } + } + while ((p != end) && is_integer(*p)) { + if (answer.num_digits < max_digits) { + answer.digits[answer.num_digits] = uint8_t(*p - '0'); + } + answer.num_digits++; + ++p; + } + answer.decimal_point = int32_t(first_after_period - p); + } + if(answer.num_digits > 0) { + const char *preverse = p - 1; + int32_t trailing_zeros = 0; + while ((*preverse == '0') || (*preverse == '.')) { + if(*preverse == '0') { trailing_zeros++; }; + --preverse; + } + answer.decimal_point += int32_t(answer.num_digits); + answer.num_digits -= uint32_t(trailing_zeros); + } + if(answer.num_digits > max_digits ) { + answer.num_digits = max_digits; + answer.truncated = true; + } + if ((p != end) && (('e' == *p) || ('E' == *p))) { + ++p; + if(p == end) { return answer; } // should never happen + bool neg_exp = false; + if ('-' == *p) { + neg_exp = true; + ++p; + } else if ('+' == *p) { + ++p; + } + int32_t exp_number = 0; // exponential part + while ((p != end) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - '0'); + if (exp_number < 0x10000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + answer.decimal_point += (neg_exp ? -exp_number : exp_number); + } + return answer; +} + +namespace { + +// remove all final zeroes +inline void trim(decimal &h) { + while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) { + h.num_digits--; + } +} + +uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) { + shift &= 63; + const static uint16_t number_of_digits_decimal_left_shift_table[65] = { + 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, + 0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, + 0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, + 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0, + 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA, + 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, + 0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, + 0x051C, 0x051C, + }; + uint32_t x_a = number_of_digits_decimal_left_shift_table[shift]; + uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1]; + uint32_t num_new_digits = x_a >> 11; + uint32_t pow5_a = 0x7FF & x_a; + uint32_t pow5_b = 0x7FF & x_b; + const static uint8_t + number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = { + 5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, + 3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, + 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, + 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, + 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, + 3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, + 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, + 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5, + 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, + 0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, + 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, + 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2, + 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, + 6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, + 2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, + 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6, + 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3, + 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, + 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, + 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5, + 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5, + 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, + 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, + 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, + 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7, + 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, + 3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, + 2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, + 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, + 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, + 8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, + 2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, + 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2, + 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5, + 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, + 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, + 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, + 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9, + 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, + 2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, + 9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, + 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1, + 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, + 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, + 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, + 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1, + 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3, + 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, + 3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, + 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, + 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, + 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, + 6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, + 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, + 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, + 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, + 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5, + }; + const uint8_t *pow5 = + &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a]; + uint32_t i = 0; + uint32_t n = pow5_b - pow5_a; + for (; i < n; i++) { + if (i >= h.num_digits) { + return num_new_digits - 1; + } else if (h.digits[i] == pow5[i]) { + continue; + } else if (h.digits[i] < pow5[i]) { + return num_new_digits - 1; + } else { + return num_new_digits; + } + } + return num_new_digits; +} + +} // end of anonymous namespace + +uint64_t round(decimal &h) { + if ((h.num_digits == 0) || (h.decimal_point < 0)) { + return 0; + } else if (h.decimal_point > 18) { + return UINT64_MAX; + } + // at this point, we know that h.decimal_point >= 0 + uint32_t dp = uint32_t(h.decimal_point); + uint64_t n = 0; + for (uint32_t i = 0; i < dp; i++) { + n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0); + } + bool round_up = false; + if (dp < h.num_digits) { + round_up = h.digits[dp] >= 5; // normally, we round up + // but we may need to round to even! + if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) { + round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1])); + } + } + if (round_up) { + n++; + } + return n; +} + +// computes h * 2^-shift +void decimal_left_shift(decimal &h, uint32_t shift) { + if (h.num_digits == 0) { + return; + } + uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift); + int32_t read_index = int32_t(h.num_digits - 1); + uint32_t write_index = h.num_digits - 1 + num_new_digits; + uint64_t n = 0; + + while (read_index >= 0) { + n += uint64_t(h.digits[read_index]) << shift; + uint64_t quotient = n / 10; + uint64_t remainder = n - (10 * quotient); + if (write_index < max_digits) { + h.digits[write_index] = uint8_t(remainder); + } else if (remainder > 0) { + h.truncated = true; + } + n = quotient; + write_index--; + read_index--; + } + while (n > 0) { + uint64_t quotient = n / 10; + uint64_t remainder = n - (10 * quotient); + if (write_index < max_digits) { + h.digits[write_index] = uint8_t(remainder); + } else if (remainder > 0) { + h.truncated = true; + } + n = quotient; + write_index--; + } + h.num_digits += num_new_digits; + if (h.num_digits > max_digits) { + h.num_digits = max_digits; + } + h.decimal_point += int32_t(num_new_digits); + trim(h); +} + +// computes h * 2^shift +void decimal_right_shift(decimal &h, uint32_t shift) { + uint32_t read_index = 0; + uint32_t write_index = 0; + + uint64_t n = 0; + + while ((n >> shift) == 0) { + if (read_index < h.num_digits) { + n = (10 * n) + h.digits[read_index++]; + } else if (n == 0) { + return; + } else { + while ((n >> shift) == 0) { + n = 10 * n; + read_index++; + } + break; + } + } + h.decimal_point -= int32_t(read_index - 1); + if (h.decimal_point < -decimal_point_range) { // it is zero + h.num_digits = 0; + h.decimal_point = 0; + h.negative = false; + h.truncated = false; + return; + } + uint64_t mask = (uint64_t(1) << shift) - 1; + while (read_index < h.num_digits) { + uint8_t new_digit = uint8_t(n >> shift); + n = (10 * (n & mask)) + h.digits[read_index++]; + h.digits[write_index++] = new_digit; + } + while (n > 0) { + uint8_t new_digit = uint8_t(n >> shift); + n = 10 * (n & mask); + if (write_index < max_digits) { + h.digits[write_index++] = new_digit; + } else if (new_digit > 0) { + h.truncated = true; + } + } + h.num_digits = write_index; + trim(h); +} + +template adjusted_mantissa compute_float(decimal &d) { + adjusted_mantissa answer; + if (d.num_digits == 0) { + // should be zero + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + // At this point, going further, we can assume that d.num_digits > 0. + // We want to guard against excessive decimal point values because + // they can result in long running times. Indeed, we do + // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22 + // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not + // fine (runs for a long time). + // + if(d.decimal_point < -324) { + // We have something smaller than 1e-324 which is always zero + // in binary64 and binary32. + // It should be zero. + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } else if(d.decimal_point >= 310) { + // We have something at least as large as 0.1e310 which is + // always infinite. + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + + static const uint32_t max_shift = 60; + static const uint32_t num_powers = 19; + static const uint8_t powers[19] = { + 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // + 33, 36, 39, 43, 46, 49, 53, 56, 59, // + }; + int32_t exp2 = 0; + while (d.decimal_point > 0) { + uint32_t n = uint32_t(d.decimal_point); + uint32_t shift = (n < num_powers) ? powers[n] : max_shift; + decimal_right_shift(d, shift); + if (d.decimal_point < -decimal_point_range) { + // should be zero + answer.power2 = 0; + answer.mantissa = 0; + return answer; + } + exp2 += int32_t(shift); + } + // We shift left toward [1/2 ... 1]. + while (d.decimal_point <= 0) { + uint32_t shift; + if (d.decimal_point == 0) { + if (d.digits[0] >= 5) { + break; + } + shift = (d.digits[0] < 2) ? 2 : 1; + } else { + uint32_t n = uint32_t(-d.decimal_point); + shift = (n < num_powers) ? powers[n] : max_shift; + } + decimal_left_shift(d, shift); + if (d.decimal_point > decimal_point_range) { + // we want to get infinity: + answer.power2 = 0xFF; + answer.mantissa = 0; + return answer; + } + exp2 -= int32_t(shift); + } + // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. + exp2--; + constexpr int32_t minimum_exponent = binary::minimum_exponent(); + while ((minimum_exponent + 1) > exp2) { + uint32_t n = uint32_t((minimum_exponent + 1) - exp2); + if (n > max_shift) { + n = max_shift; + } + decimal_right_shift(d, n); + exp2 += int32_t(n); + } + if ((exp2 - minimum_exponent) >= binary::infinite_power()) { + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + + const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1; + decimal_left_shift(d, mantissa_size_in_bits); + + uint64_t mantissa = round(d); + // It is possible that we have an overflow, in which case we need + // to shift back. + if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) { + decimal_right_shift(d, 1); + exp2 += 1; + mantissa = round(d); + if ((exp2 - minimum_exponent) >= binary::infinite_power()) { + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + } + answer.power2 = exp2 - binary::minimum_exponent(); + if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) { + answer.power2--; + } + answer.mantissa = + mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1); + return answer; +} + +template +adjusted_mantissa parse_long_mantissa(const char *first) { + decimal d = parse_decimal(first); + return compute_float(d); +} + +template +adjusted_mantissa parse_long_mantissa(const char *first, const char *end) { + decimal d = parse_decimal(first, end); + return compute_float(d); +} + +double from_chars(const char *first) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + + +double from_chars(const char *first, const char *end) noexcept { + bool negative = first[0] == '-'; + if (negative) { + first++; + } + adjusted_mantissa am = parse_long_mantissa>(first, end); + uint64_t word = am.mantissa; + word |= uint64_t(am.power2) + << binary_format::mantissa_explicit_bits(); + word = negative ? word | (uint64_t(1) << binary_format::sign_index()) + : word; + double value; + std::memcpy(&value, &word, sizeof(double)); + return value; +} + +} // internal +} // simdjson + +#endif // SIMDJSON_SRC_FROM_CHARS_CPP +/* end file from_chars.cpp */ +/* including internal/error_tables.cpp: #include */ +/* begin file internal/error_tables.cpp */ +#ifndef SIMDJSON_SRC_ERROR_TABLES_CPP +#define SIMDJSON_SRC_ERROR_TABLES_CPP + +/* including simdjson/internal/jsoncharutils_tables.h: #include */ +/* begin file simdjson/internal/jsoncharutils_tables.h */ +#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#ifdef JSON_TEST_STRINGS +void found_string(const uint8_t *buf, const uint8_t *parsed_begin, + const uint8_t *parsed_end); +void found_bad_string(const uint8_t *buf); +#endif + +namespace simdjson { +namespace internal { +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; +extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +/* end file simdjson/internal/jsoncharutils_tables.h */ +/* including simdjson/error-inl.h: #include */ +/* begin file simdjson/error-inl.h */ +#ifndef SIMDJSON_ERROR_INL_H +#define SIMDJSON_ERROR_INL_H + +/* skipped duplicate #include "simdjson/error.h" */ + +#include + +namespace simdjson { +namespace internal { + // We store the error code so we can validate the error message is associated with the right code + struct error_code_info { + error_code code; + const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor) + }; + // These MUST match the codes in error_code. We check this constraint in basictests. + extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; +} // namespace internal + + +inline const char *error_message(error_code error) noexcept { + // If you're using error_code, we're trusting you got it from the enum. + return internal::error_codes[int(error)].message; +} + +// deprecated function +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +inline const std::string error_message(int error) noexcept { + if (error < 0 || error >= error_code::NUM_ERROR_CODES) { + return internal::error_codes[UNEXPECTED_ERROR].message; + } + return internal::error_codes[error].message; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { + return out << error_message(error); +} + +namespace internal { + +// +// internal::simdjson_result_base inline implementation +// + +template +simdjson_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept + : std::pair(std::forward(value), error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept + : simdjson_result_base(T{}, error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept + : simdjson_result_base(std::forward(value), SUCCESS) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base() noexcept + : simdjson_result_base(T{}, UNINITIALIZED) {} + +} // namespace internal + +/// +/// simdjson_result inline implementation +/// + +template +simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { + std::forward>(*this).tie(value, error); +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { + return std::forward>(*this).get(value); +} + +template +simdjson_inline error_code simdjson_result::error() const noexcept { + return internal::simdjson_result_base::error(); +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result::value() & noexcept(false) { + return internal::simdjson_result_base::value(); +} + +template +simdjson_inline T&& simdjson_result::value() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T&& simdjson_result::take_value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline simdjson_result::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result::value_unsafe() const& noexcept { + return internal::simdjson_result_base::value_unsafe(); +} + +template +simdjson_inline T&& simdjson_result::value_unsafe() && noexcept { + return std::forward>(*this).value_unsafe(); +} + +template +simdjson_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept + : internal::simdjson_result_base(std::forward(value), error) {} +template +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +template +simdjson_inline simdjson_result::simdjson_result(T &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +template +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} + +} // namespace simdjson + +#endif // SIMDJSON_ERROR_INL_H +/* end file simdjson/error-inl.h */ + +namespace simdjson { +namespace internal { + + SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[] { + { SUCCESS, "SUCCESS: No error" }, + { CAPACITY, "CAPACITY: This parser can't support a document that big" }, + { MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" }, + { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." }, + { DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" }, + { STRING_ERROR, "STRING_ERROR: Problem while parsing a string" }, + { T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" }, + { F_ATOM_ERROR, "F_ATOM_ERROR: Problem while parsing an atom starting with the letter 'f'" }, + { N_ATOM_ERROR, "N_ATOM_ERROR: Problem while parsing an atom starting with the letter 'n'" }, + { NUMBER_ERROR, "NUMBER_ERROR: Problem while parsing a number" }, + { UTF8_ERROR, "UTF8_ERROR: The input is not valid UTF-8" }, + { UNINITIALIZED, "UNINITIALIZED: Uninitialized" }, + { EMPTY, "EMPTY: no JSON found" }, + { UNESCAPED_CHARS, "UNESCAPED_CHARS: Within strings, some characters must be escaped, we found unescaped characters" }, + { UNCLOSED_STRING, "UNCLOSED_STRING: A string is opened, but never closed." }, + { UNSUPPORTED_ARCHITECTURE, "UNSUPPORTED_ARCHITECTURE: simdjson does not have an implementation supported by this CPU architecture. Please report this error to the core team as it should never happen." }, + { INCORRECT_TYPE, "INCORRECT_TYPE: The JSON element does not have the requested type." }, + { NUMBER_OUT_OF_RANGE, "NUMBER_OUT_OF_RANGE: The JSON number is too large or too small to fit within the requested type." }, + { INDEX_OUT_OF_BOUNDS, "INDEX_OUT_OF_BOUNDS: Attempted to access an element of a JSON array that is beyond its length." }, + { NO_SUCH_FIELD, "NO_SUCH_FIELD: The JSON field referenced does not exist in this object." }, + { IO_ERROR, "IO_ERROR: Error reading the file." }, + { INVALID_JSON_POINTER, "INVALID_JSON_POINTER: Invalid JSON pointer syntax." }, + { INVALID_URI_FRAGMENT, "INVALID_URI_FRAGMENT: Invalid URI fragment syntax." }, + { UNEXPECTED_ERROR, "UNEXPECTED_ERROR: Unexpected error, consider reporting this problem as you may have found a bug in simdjson" }, + { PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." }, + { OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." }, + { INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, + { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." }, + { SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, + { OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."}, + { TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."} + }; // error_messages[] + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_SRC_ERROR_TABLES_CPP +/* end file internal/error_tables.cpp */ +/* including internal/jsoncharutils_tables.cpp: #include */ +/* begin file internal/jsoncharutils_tables.cpp */ +#ifndef SIMDJSON_SRC_JSONCHARUTILS_TABLES_CPP +#define SIMDJSON_SRC_JSONCHARUTILS_TABLES_CPP + +/* skipped duplicate #include */ + +namespace simdjson { +namespace internal { + +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + +SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa, + 0xb, 0xc, 0xd, 0xe, 0xf, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa, 0xb, 0xc, 0xd, 0xe, + 0xf, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x10, 0x20, 0x30, 0x40, 0x50, + 0x60, 0x70, 0x80, 0x90, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa0, + 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, + 0xf0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x100, 0x200, 0x300, 0x400, 0x500, + 0x600, 0x700, 0x800, 0x900, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa00, + 0xb00, 0xc00, 0xd00, 0xe00, 0xf00, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa00, 0xb00, 0xc00, 0xd00, 0xe00, + 0xf00, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, + 0x6000, 0x7000, 0x8000, 0x9000, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xa000, + 0xb000, 0xc000, 0xd000, 0xe000, 0xf000, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xa000, 0xb000, 0xc000, 0xd000, 0xe000, + 0xf000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_SRC_JSONCHARUTILS_TABLES_CPP +/* end file internal/jsoncharutils_tables.cpp */ +/* including internal/numberparsing_tables.cpp: #include */ +/* begin file internal/numberparsing_tables.cpp */ +#ifndef SIMDJSON_SRC_NUMBERPARSING_TABLES_CPP +#define SIMDJSON_SRC_NUMBERPARSING_TABLES_CPP + +/* skipped duplicate #include */ +/* including simdjson/internal/numberparsing_tables.h: #include */ +/* begin file simdjson/internal/numberparsing_tables.h */ +#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { +namespace internal { +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +constexpr int smallest_power = -342; +constexpr int largest_power = 308; + +/** + * Represents a 128-bit value. + * low: least significant 64 bits. + * high: most significant 64 bits. + */ +struct value128 { + uint64_t low; + uint64_t high; +}; + + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; + + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +/* end file simdjson/internal/numberparsing_tables.h */ + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +SIMDJSON_DLLIMPORTEXPORT const double simdjson::internal::power_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +SIMDJSON_DLLIMPORTEXPORT const uint64_t simdjson::internal::power_of_five_128[]= { + 0xeef453d6923bd65a,0x113faa2906a13b3f, + 0x9558b4661b6565f8,0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76,0x5d79bcf00d2df649, + 0xe95a99df8ace6f53,0xf4d82c2c107973dc, + 0x91d8a02bb6c10594,0x79071b9b8a4be869, + 0xb64ec836a47146f9,0x9748e2826cdee284, + 0xe3e27a444d8d98b7,0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72,0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f,0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723,0xad2c788035e61382, + 0x8b16fb203055ac76,0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793,0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78,0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b,0x8672648c40e5ad68, + 0xa9c98d8ccb009506,0x680efdaf511f18c2, + 0xd43bf0effdc0ba48,0x212bd1b2566def2, + 0x84a57695fe98746d,0x14bb630f7604b57, + 0xa5ced43b7e3e9188,0x419ea3bd35385e2d, + 0xcf42894a5dce35ea,0x52064cac828675b9, + 0x818995ce7aa0e1b2,0x7343efebd1940993, + 0xa1ebfb4219491a1f,0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6,0xd41a26e077774ef6, + 0xfd00b897478238d0,0x8920b098955522b4, + 0x9e20735e8cb16382,0x55b46e5f5d5535b0, + 0xc5a890362fddbc62,0xeb2189f734aa831d, + 0xf712b443bbd52b7b,0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d,0x47b233c92125366e, + 0xc1069cd4eabe89f8,0x999ec0bb696e840a, + 0xf148440a256e2c76,0xc00670ea43ca250d, + 0x96cd2a865764dbca,0x380406926a5e5728, + 0xbc807527ed3e12bc,0xc605083704f5ecf2, + 0xeba09271e88d976b,0xf7864a44c633682e, + 0x93445b8731587ea3,0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c,0x5960ea05bad82964, + 0xe61acf033d1a45df,0x6fb92487298e33bd, + 0x8fd0c16206306bab,0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696,0x8f48a4899877186c, + 0xe0b62e2929aba83c,0x331acdabfe94de87, + 0x8c71dcd9ba0b4925,0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f,0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a,0xc9e82cd9f69d6150, + 0x892731ac9faf056e,0xbe311c083a225cd2, + 0xab70fe17c79ac6ca,0x6dbd630a48aaf406, + 0xd64d3d9db981787d,0x92cbbccdad5b108, + 0x85f0468293f0eb4e,0x25bbf56008c58ea5, + 0xa76c582338ed2621,0xaf2af2b80af6f24e, + 0xd1476e2c07286faa,0x1af5af660db4aee1, + 0x82cca4db847945ca,0x50d98d9fc890ed4d, + 0xa37fce126597973c,0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c,0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f,0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1,0x77b191618c54e9ac, + 0xc795830d75038c1d,0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25,0x4b0573286b44ad1d, + 0x9becce62836ac577,0x4ee367f9430aec32, + 0xc2e801fb244576d5,0x229c41f793cda73f, + 0xf3a20279ed56d48a,0x6b43527578c1110f, + 0x9845418c345644d6,0x830a13896b78aaa9, + 0xbe5691ef416bd60c,0x23cc986bc656d553, + 0xedec366b11c6cb8f,0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39,0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07,0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9,0xd1b3400f8f9cff68, + 0x91376c36d99995be,0x23100809b9c21fa1, + 0xb58547448ffffb2d,0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9,0x16c90c8f323f516c, + 0x8dd01fad907ffc3b,0xae3da7d97f6792e3, + 0xb1442798f49ffb4a,0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d,0x40405643d711d583, + 0x8a7d3eef7f1cfc52,0x482835ea666b2572, + 0xad1c8eab5ee43b66,0xda3243650005eecf, + 0xd863b256369d4a40,0x90bed43e40076a82, + 0x873e4f75e2224e68,0x5a7744a6e804a291, + 0xa90de3535aaae202,0x711515d0a205cb36, + 0xd3515c2831559a83,0xd5a5b44ca873e03, + 0x8412d9991ed58091,0xe858790afe9486c2, + 0xa5178fff668ae0b6,0x626e974dbe39a872, + 0xce5d73ff402d98e3,0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e,0x7ce66634bc9d0b99, + 0xa139029f6a239f72,0x1c1fffc1ebc44e80, + 0xc987434744ac874e,0xa327ffb266b56220, + 0xfbe9141915d7a922,0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5,0x6f773fc3603db4a9, + 0xc4ce17b399107c22,0xcb550fb4384d21d3, + 0xf6019da07f549b2b,0x7e2a53a146606a48, + 0x99c102844f94e0fb,0x2eda7444cbfc426d, + 0xc0314325637a1939,0xfa911155fefb5308, + 0xf03d93eebc589f88,0x793555ab7eba27ca, + 0x96267c7535b763b5,0x4bc1558b2f3458de, + 0xbbb01b9283253ca2,0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb,0x465e15a979c1cadc, + 0x92a1958a7675175f,0xbfacd89ec191ec9, + 0xb749faed14125d36,0xcef980ec671f667b, + 0xe51c79a85916f484,0x82b7e12780e7401a, + 0x8f31cc0937ae58d2,0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07,0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9,0x67a791e093e1d49a, + 0x8bd6a141006042bd,0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d,0x58fae9f773886e18, + 0xda7f5bf590966848,0xaf39a475506a899e, + 0x888f99797a5e012d,0x6d8406c952429603, + 0xaab37fd7d8f58178,0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6,0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26,0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f,0xf42faa48c0ea481e, + 0xd0601d8efc57b08b,0xf13b94daf124da26, + 0x823c12795db6ce57,0x76c53d08d6b70858, + 0xa2cb1717b52481ed,0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268,0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02,0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1,0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a,0x359ab6419ca1091b, + 0xf867241c8cc6d4c0,0xc30163d203c94b62, + 0x9b407691d7fc44f8,0x79e0de63425dcf1d, + 0xc21094364dfb5636,0x985915fc12f542e4, + 0xf294b943e17a2bc4,0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a,0xa705992ceecf9c42, + 0xbd8430bd08277231,0x50c6ff782a838353, + 0xece53cec4a314ebd,0xa4f8bf5635246428, + 0x940f4613ae5ed136,0x871b7795e136be99, + 0xb913179899f68584,0x28e2557b59846e3f, + 0xe757dd7ec07426e5,0x331aeada2fe589cf, + 0x9096ea6f3848984f,0x3ff0d2c85def7621, + 0xb4bca50b065abe63,0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb,0xd3e8495912c62894, + 0x8d3360f09cf6e4bd,0x64712dd7abbbd95c, + 0xb080392cc4349dec,0xbd8d794d96aacfb3, + 0xdca04777f541c567,0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60,0xf41686c49db57244, + 0xac5d37d5b79b6239,0x311c2875c522ced5, + 0xd77485cb25823ac7,0x7d633293366b828b, + 0x86a8d39ef77164bc,0xae5dff9c02033197, + 0xa8530886b54dbdeb,0xd9f57f830283fdfc, + 0xd267caa862a12d66,0xd072df63c324fd7b, + 0x8380dea93da4bc60,0x4247cb9e59f71e6d, + 0xa46116538d0deb78,0x52d9be85f074e608, + 0xcd795be870516656,0x67902e276c921f8b, + 0x806bd9714632dff6,0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3,0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0,0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c,0x796b805720085f81, + 0x9cc3a6eec6311a63,0xcbe3303674053bb0, + 0xc3f490aa77bd60fc,0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b,0xee92fb5515482d44, + 0x991711052d8bf3c5,0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6,0xd262d45a78a0635d, + 0xef340a98172aace4,0x86fb897116c87c34, + 0x9580869f0e7aac0e,0xd45d35e6ae3d4da0, + 0xbae0a846d2195712,0x8974836059cca109, + 0xe998d258869facd7,0x2bd1a438703fc94b, + 0x91ff83775423cc06,0x7b6306a34627ddcf, + 0xb67f6455292cbf08,0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca,0x20caba5f1d9e4a93, + 0x8e938662882af53e,0x547eb47b7282ee9c, + 0xb23867fb2a35b28d,0xe99e619a4f23aa43, + 0xdec681f9f4c31f31,0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e,0xde83bc408dd3dd04, + 0xae0b158b4738705e,0x9624ab50b148d445, + 0xd98ddaee19068c76,0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9,0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc,0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b,0x7647c3200069671f, + 0x84c8d4dfd2c63f3b,0x29ecd9f40041e073, + 0xa5fb0a17c777cf09,0xf468107100525890, + 0xcf79cc9db955c2cc,0x7182148d4066eeb4, + 0x81ac1fe293d599bf,0xc6f14cd848405530, + 0xa21727db38cb002f,0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b,0xa6d90811f0e4851c, + 0xfd442e4688bd304a,0x908f4a166d1da663, + 0x9e4a9cec15763e2e,0x9a598e4e043287fe, + 0xc5dd44271ad3cdba,0x40eff1e1853f29fd, + 0xf7549530e188c128,0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9,0x82bb74f8301958ce, + 0xc13a148e3032d6e7,0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1,0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5,0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de,0x7415d448f6b6f0e7, + 0xebdf661791d60f56,0x111b495b3464ad21, + 0x936b9fcebb25c995,0xcab10dd900beec34, + 0xb84687c269ef3bfb,0x3d5d514f40eea742, + 0xe65829b3046b0afa,0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc,0x47f0e785eaba72ab, + 0xb3f4e093db73a093,0x59ed216765690f56, + 0xe0f218b8d25088b8,0x306869c13ec3532c, + 0x8c974f7383725573,0x1e414218c73a13fb, + 0xafbd2350644eeacf,0xe5d1929ef90898fa, + 0xdbac6c247d62a583,0xdf45f746b74abf39, + 0x894bc396ce5da772,0x6b8bba8c328eb783, + 0xab9eb47c81f5114f,0x66ea92f3f326564, + 0xd686619ba27255a2,0xc80a537b0efefebd, + 0x8613fd0145877585,0xbd06742ce95f5f36, + 0xa798fc4196e952e7,0x2c48113823b73704, + 0xd17f3b51fca3a7a0,0xf75a15862ca504c5, + 0x82ef85133de648c4,0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5,0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3,0x318df905079926a8, + 0xffbbcfe994e5c61f,0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3,0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8,0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b,0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d,0x6bea10ca65c084e, + 0xc31bfa0fe5698db8,0x486e494fcff30a62, + 0xf3e2f893dec3f126,0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7,0xf89629465a75e01c, + 0xbe89523386091465,0xf6bbb397f1135823, + 0xee2ba6c0678b597f,0x746aa07ded582e2c, + 0x94db483840b717ef,0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb,0x92f34d62616ce413, + 0xe896a0d7e51e1566,0x77b020baf9c81d17, + 0x915e2486ef32cd60,0xace1474dc1d122e, + 0xb5b5ada8aaff80b8,0xd819992132456ba, + 0xe3231912d5bf60e6,0x10e1fff697ed6c69, + 0x8df5efabc5979c8f,0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3,0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0,0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4,0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d,0x86c16c98d2c953c6, + 0xd89d64d57a607744,0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b,0x11471cd764ad4972, + 0xa93af6c6c79b5d2d,0xd598e40d3dd89bcf, + 0xd389b47879823479,0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb,0xcedf722a585139ba, + 0xa54394fe1eedb8fe,0xc2974eb4ee658828, + 0xce947a3da6a9273e,0x733d226229feea32, + 0x811ccc668829b887,0x806357d5a3f525f, + 0xa163ff802a3426a8,0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052,0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67,0xbbac2078d443ace2, + 0x9d9ba7832936edc0,0xd54b944b84aa4c0d, + 0xc5029163f384a931,0xa9e795e65d4df11, + 0xf64335bcf065d37d,0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e,0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39,0xe45ec2862f71e1d6, + 0xf07da27a82c37088,0x5d767327bb4e5a4c, + 0x964e858c91ba2655,0x3a6a07f8d510f86f, + 0xbbe226efb628afea,0x890489f70a55368b, + 0xeadab0aba3b2dbe5,0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f,0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb,0x9ce6ebb40173744, + 0xe55990879ddcaabd,0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6,0x9fa946824a12232d, + 0xb32df8e9f3546564,0x47939822dc96abf9, + 0xdff9772470297ebd,0x59787e2b93bc56f7, + 0x8bfbea76c619ef36,0x57eb4edb3c55b65a, + 0xaefae51477a06b03,0xede622920b6b23f1, + 0xdab99e59958885c4,0xe95fab368e45eced, + 0x88b402f7fd75539b,0x11dbcb0218ebb414, + 0xaae103b5fcd2a881,0xd652bdc29f26a119, + 0xd59944a37c0752a2,0x4be76d3346f0495f, + 0x857fcae62d8493a5,0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e,0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2,0x7e2000a41346a7a7, + 0x825ecc24c873782f,0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b,0x728900802f0f32fa, + 0xcbb41ef979346bca,0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc,0xe2f610c84987bfa8, + 0x9f24b832e6b0f436,0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143,0x91503d1c79720dbb, + 0xf8a95fcf88747d94,0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c,0xc986afbe3ee11aba, + 0xc24452da229b021b,0xfbe85badce996168, + 0xf2d56790ab41c2a2,0xfae27299423fb9c3, + 0x97c560ba6b0919a5,0xdccd879fc967d41a, + 0xbdb6b8e905cb600f,0x5400e987bbc1c920, + 0xed246723473e3813,0x290123e9aab23b68, + 0x9436c0760c86e30b,0xf9a0b6720aaf6521, + 0xb94470938fa89bce,0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2,0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9,0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828,0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232,0x25c6da63c38de1b0, + 0x8d590723948a535f,0x579c487e5a38ad0e, + 0xb0af48ec79ace837,0x2d835a9df0c6d851, + 0xdcdb1b2798182244,0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b,0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5,0xe272467e3d222f3f, + 0xd7adf884aa879177,0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea,0x98e947129fc2b4e9, + 0xa87fea27a539e9a5,0x3f2398d747b36224, + 0xd29fe4b18e88640e,0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89,0x1953cf68300424ac, + 0xa48ceaaab75a8e2b,0x5fa8c3423c052dd7, + 0xcdb02555653131b6,0x3792f412cb06794d, + 0x808e17555f3ebf11,0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6,0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b,0xf245825a5a445275, + 0xfb158592be068d2e,0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d,0x55464dd69685606b, + 0xc428d05aa4751e4c,0xaa97e14c3c26b886, + 0xf53304714d9265df,0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab,0xe546a8038efe4029, + 0xbf8fdb78849a5f96,0xde98520472bdd033, + 0xef73d256a5c0f77c,0x963e66858f6d4440, + 0x95a8637627989aad,0xdde7001379a44aa8, + 0xbb127c53b17ec159,0x5560c018580d5d52, + 0xe9d71b689dde71af,0xaab8f01e6e10b4a6, + 0x9226712162ab070d,0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1,0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05,0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3,0x77f3608e92adb242, + 0xb267ed1940f1c61c,0x55f038b237591ed3, + 0xdf01e85f912e37a3,0x6b6c46dec52f6688, + 0x8b61313bbabce2c6,0x2323ac4b3b3da015, + 0xae397d8aa96c1b77,0xabec975e0a0d081a, + 0xd9c7dced53c72255,0x96e7bd358c904a21, + 0x881cea14545c7575,0x7e50d64177da2e54, + 0xaa242499697392d2,0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787,0x955e4ec64b44e864, + 0x84ec3c97da624ab4,0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61,0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba,0x67de18eda5814af2, + 0x81ceb32c4b43fcf4,0x80eacf948770ced7, + 0xa2425ff75e14fc31,0xa1258379a94d028d, + 0xcad2f7f5359a3b3e,0x96ee45813a04330, + 0xfd87b5f28300ca0d,0x8bca9d6e188853fc, + 0x9e74d1b791e07e48,0x775ea264cf55347e, + 0xc612062576589dda,0x95364afe032a81a0, + 0xf79687aed3eec551,0x3a83ddbd83f52210, + 0x9abe14cd44753b52,0xc4926a9672793580, + 0xc16d9a0095928a27,0x75b7053c0f178400, + 0xf1c90080baf72cb1,0x5324c68b12dd6800, + 0x971da05074da7bee,0xd3f6fc16ebca8000, + 0xbce5086492111aea,0x88f4bb1ca6bd0000, + 0xec1e4a7db69561a5,0x2b31e9e3d0700000, + 0x9392ee8e921d5d07,0x3aff322e62600000, + 0xb877aa3236a4b449,0x9befeb9fad487c3, + 0xe69594bec44de15b,0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9,0xf9d37014bf60a11, + 0xb424dc35095cd80f,0x538484c19ef38c95, + 0xe12e13424bb40e13,0x2865a5f206b06fba, + 0x8cbccc096f5088cb,0xf93f87b7442e45d4, + 0xafebff0bcb24aafe,0xf78f69a51539d749, + 0xdbe6fecebdedd5be,0xb573440e5a884d1c, + 0x89705f4136b4a597,0x31680a88f8953031, + 0xabcc77118461cefc,0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc,0x3d32907604691b4d, + 0x8637bd05af6c69b5,0xa63f9a49c2c1b110, + 0xa7c5ac471b478423,0xfcf80dc33721d54, + 0xd1b71758e219652b,0xd3c36113404ea4a9, + 0x83126e978d4fdf3b,0x645a1cac083126ea, + 0xa3d70a3d70a3d70a,0x3d70a3d70a3d70a4, + 0xcccccccccccccccc,0xcccccccccccccccd, + 0x8000000000000000,0x0, + 0xa000000000000000,0x0, + 0xc800000000000000,0x0, + 0xfa00000000000000,0x0, + 0x9c40000000000000,0x0, + 0xc350000000000000,0x0, + 0xf424000000000000,0x0, + 0x9896800000000000,0x0, + 0xbebc200000000000,0x0, + 0xee6b280000000000,0x0, + 0x9502f90000000000,0x0, + 0xba43b74000000000,0x0, + 0xe8d4a51000000000,0x0, + 0x9184e72a00000000,0x0, + 0xb5e620f480000000,0x0, + 0xe35fa931a0000000,0x0, + 0x8e1bc9bf04000000,0x0, + 0xb1a2bc2ec5000000,0x0, + 0xde0b6b3a76400000,0x0, + 0x8ac7230489e80000,0x0, + 0xad78ebc5ac620000,0x0, + 0xd8d726b7177a8000,0x0, + 0x878678326eac9000,0x0, + 0xa968163f0a57b400,0x0, + 0xd3c21bcecceda100,0x0, + 0x84595161401484a0,0x0, + 0xa56fa5b99019a5c8,0x0, + 0xcecb8f27f4200f3a,0x0, + 0x813f3978f8940984,0x4000000000000000, + 0xa18f07d736b90be5,0x5000000000000000, + 0xc9f2c9cd04674ede,0xa400000000000000, + 0xfc6f7c4045812296,0x4d00000000000000, + 0x9dc5ada82b70b59d,0xf020000000000000, + 0xc5371912364ce305,0x6c28000000000000, + 0xf684df56c3e01bc6,0xc732000000000000, + 0x9a130b963a6c115c,0x3c7f400000000000, + 0xc097ce7bc90715b3,0x4b9f100000000000, + 0xf0bdc21abb48db20,0x1e86d40000000000, + 0x96769950b50d88f4,0x1314448000000000, + 0xbc143fa4e250eb31,0x17d955a000000000, + 0xeb194f8e1ae525fd,0x5dcfab0800000000, + 0x92efd1b8d0cf37be,0x5aa1cae500000000, + 0xb7abc627050305ad,0xf14a3d9e40000000, + 0xe596b7b0c643c719,0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f,0xe4820023a2000000, + 0xb35dbf821ae4f38b,0xdda2802c8a800000, + 0xe0352f62a19e306e,0xd50b2037ad200000, + 0x8c213d9da502de45,0x4526f422cc340000, + 0xaf298d050e4395d6,0x9670b12b7f410000, + 0xdaf3f04651d47b4c,0x3c0cdd765f114000, + 0x88d8762bf324cd0f,0xa5880a69fb6ac800, + 0xab0e93b6efee0053,0x8eea0d047a457a00, + 0xd5d238a4abe98068,0x72a4904598d6d880, + 0x85a36366eb71f041,0x47a6da2b7f864750, + 0xa70c3c40a64e6c51,0x999090b65f67d924, + 0xd0cf4b50cfe20765,0xfff4b4e3f741cf6d, + 0x82818f1281ed449f,0xbff8f10e7a8921a4, + 0xa321f2d7226895c7,0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39,0x9bf4f8a69f764490, + 0xfee50b7025c36a08,0x2f236d04753d5b4, + 0x9f4f2726179a2245,0x1d762422c946590, + 0xc722f0ef9d80aad6,0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b,0xd2e0898765a7deb2, + 0x9b934c3b330c8577,0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5,0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a,0x8bef464e3945ef7a, + 0x97edd871cfda3a56,0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec,0x3d52eeed1cbea317, + 0xed63a231d4c4fb27,0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8,0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436,0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44,0x60dbbca87196b616, + 0x90e40fbeea1d3a4a,0xbc8955e946fe31cd, + 0xb51d13aea4a488dd,0x6babab6398bdbe41, + 0xe264589a4dcdab14,0xc696963c7eed2dd1, + 0x8d7eb76070a08aec,0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8,0x3b25a55f43294bcb, + 0xdd15fe86affad912,0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab,0x6e3569326c784337, + 0xacb92ed9397bf996,0x49c2c37f07965404, + 0xd7e77a8f87daf7fb,0xdc33745ec97be906, + 0x86f0ac99b4e8dafd,0x69a028bb3ded71a3, + 0xa8acd7c0222311bc,0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b,0xf50a3fa490c30190, + 0x83c7088e1aab65db,0x792667c6da79e0fa, + 0xa4b8cab1a1563f52,0x577001b891185938, + 0xcde6fd5e09abcf26,0xed4c0226b55e6f86, + 0x80b05e5ac60b6178,0x544f8158315b05b4, + 0xa0dc75f1778e39d6,0x696361ae3db1c721, + 0xc913936dd571c84c,0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f,0x4ab48a04065c723, + 0x9d174b2dcec0e47b,0x62eb0d64283f9c76, + 0xc45d1df942711d9a,0x3ba5d0bd324f8394, + 0xf5746577930d6500,0xca8f44ec7ee36479, + 0x9968bf6abbe85f20,0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8,0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2,0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5,0xbba1f1d158724a12, + 0xbb445da9ca61281f,0x2a8a6e45ae8edc97, + 0xea1575143cf97226,0xf52d09d71a3293bd, + 0x924d692ca61be758,0x593c2626705f9c56, + 0xb6e0c377cfa2e12e,0x6f8b2fb00c77836c, + 0xe498f455c38b997a,0xb6dfb9c0f956447, + 0x8edf98b59a373fec,0x4724bd4189bd5eac, + 0xb2977ee300c50fe7,0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1,0x2f2967b66737e3ed, + 0x8b865b215899f46c,0xbd79e0d20082ee74, + 0xae67f1e9aec07187,0xecd8590680a3aa11, + 0xda01ee641a708de9,0xe80e6f4820cc9495, + 0x884134fe908658b2,0x3109058d147fdcdd, + 0xaa51823e34a7eede,0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96,0x6c9e18ac7007c91a, + 0x850fadc09923329e,0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45,0x84db8346b786151c, + 0xcfe87f7cef46ff16,0xe612641865679a63, + 0x81f14fae158c5f6e,0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749,0xe3be5e330f38f09d, + 0xcb090c8001ab551c,0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63,0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e,0x2867e7fddcdd9afa, + 0xc646d63501a1511d,0xb281e1fd541501b8, + 0xf7d88bc24209a565,0x1f225a7ca91a4226, + 0x9ae757596946075f,0x3375788de9b06958, + 0xc1a12d2fc3978937,0x52d6b1641c83ae, + 0xf209787bb47d6b84,0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332,0xf840b7ba963646e0, + 0xbd176620a501fbff,0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff,0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf,0xc66f336c36b10137, + 0xb8a8d9bbe123f017,0xb80b0047445d4184, + 0xe6d3102ad96cec1d,0xa60dc059157491e5, + 0x9043ea1ac7e41392,0x87c89837ad68db2f, + 0xb454e4a179dd1877,0x29babe4598c311fb, + 0xe16a1dc9d8545e94,0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d,0x1899e4a65f58660c, + 0xb01ae745b101e9e4,0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d,0x76707543f4fa1f73, + 0x899504ae72497eba,0x6a06494a791c53a8, + 0xabfa45da0edbde69,0x487db9d17636892, + 0xd6f8d7509292d603,0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2,0xb8a2392ba45a9b2, + 0xa7f26836f282b732,0x8e6cac7768d7141e, + 0xd1ef0244af2364ff,0x3207d795430cd926, + 0x8335616aed761f1f,0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7,0x5f16206c9c6209a6, + 0xcd036837130890a1,0x36dba887c37a8c0f, + 0x802221226be55a64,0xc2494954da2c9789, + 0xa02aa96b06deb0fd,0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d,0x6f92829494e5acc7, + 0xfa42a8b73abbf48c,0xcb772339ba1f17f9, + 0x9c69a97284b578d7,0xff2a760414536efb, + 0xc38413cf25e2d70d,0xfef5138519684aba, + 0xf46518c2ef5b8cd1,0x7eb258665fc25d69, + 0x98bf2f79d5993802,0xef2f773ffbd97a61, + 0xbeeefb584aff8603,0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784,0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2,0xdd945a747bf26183, + 0xba756174393d88df,0x94f971119aeef9e4, + 0xe912b9d1478ceb17,0x7a37cd5601aab85d, + 0x91abb422ccb812ee,0xac62e055c10ab33a, + 0xb616a12b7fe617aa,0x577b986b314d6009, + 0xe39c49765fdf9d94,0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d,0x14588f13be847307, + 0xb1d219647ae6b31c,0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3,0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee,0x25de7bb9480d5854, + 0xada72ccc20054ae9,0xaf561aa79a10ae6a, + 0xd910f7ff28069da4,0x1b2ba1518094da04, + 0x87aa9aff79042286,0x90fb44d2f05d0842, + 0xa99541bf57452b28,0x353a1607ac744a53, + 0xd3fa922f2d1675f2,0x42889b8997915ce8, + 0x847c9b5d7c2e09b7,0x69956135febada11, + 0xa59bc234db398c25,0x43fab9837e699095, + 0xcf02b2c21207ef2e,0x94f967e45e03f4bb, + 0x8161afb94b44f57d,0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc,0x6462d92a69731732, + 0xca28a291859bbf93,0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78,0x5cda735244c3d43e, + 0x9defbf01b061adab,0x3a0888136afa64a7, + 0xc56baec21c7a1916,0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b,0x8aad549e57273d45, + 0x9a3c2087a63f6399,0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f,0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f,0x656d44a2a11c51d5, + 0x969eb7c47859e743,0x9f644ae5a4b1b325, + 0xbc4665b596706114,0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959,0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8,0x9a7f12442d588f2, + 0xb7dcbf5354e9bece,0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81,0x8f1668c8a86da5fa, + 0x8fa475791a569d10,0xf96e017d694487bc, + 0xb38d92d760ec4455,0x37c981dcc395a9ac, + 0xe070f78d3927556a,0x85bbe253f47b1417, + 0x8c469ab843b89562,0x93956d7478ccec8e, + 0xaf58416654a6babb,0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a,0x6997b05fcc0319e, + 0x88fcf317f22241e2,0x441fece3bdf81f03, + 0xab3c2fddeeaad25a,0xd527e81cad7626c3, + 0xd60b3bd56a5586f1,0x8a71e223d8d3b074, + 0x85c7056562757456,0xf6872d5667844e49, + 0xa738c6bebb12d16c,0xb428f8ac016561db, + 0xd106f86e69d785c7,0xe13336d701beba52, + 0x82a45b450226b39c,0xecc0024661173473, + 0xa34d721642b06084,0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5,0x31ec038df7b441f4, + 0xff290242c83396ce,0x7e67047175a15271, + 0x9f79a169bd203e41,0xf0062c6e984d386, + 0xc75809c42c684dd1,0x52c07b78a3e60868, + 0xf92e0c3537826145,0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb,0x88a66076400bb691, + 0xc2abf989935ddbfe,0x6acff893d00ea435, + 0xf356f7ebf83552fe,0x583f6b8c4124d43, + 0x98165af37b2153de,0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6,0x744f18c0592e4c5c, + 0xeda2ee1c7064130c,0x1162def06f79df73, + 0x9485d4d1c63e8be7,0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1,0x6d953e2bd7173692, + 0xe8111c87c5c1ba99,0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0,0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8,0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a,0x2e44ae64840fd61d, + 0x8da471a9de737e24,0x5ceaecfed289e5d2, + 0xb10d8e1456105dad,0x7425a83e872c5f47, + 0xdd50f1996b947518,0xd12f124e28f77719, + 0x8a5296ffe33cc92f,0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b,0x636cc64d1001550b, + 0xd8210befd30efa5a,0x3c47f7e05401aa4e, + 0x8714a775e3e95c78,0x65acfaec34810a71, + 0xa8d9d1535ce3b396,0x7f1839a741a14d0d, + 0xd31045a8341ca07c,0x1ede48111209a050, + 0x83ea2b892091e44d,0x934aed0aab460432, + 0xa4e4b66b68b65d60,0xf81da84d5617853f, + 0xce1de40642e3f4b9,0x36251260ab9d668e, + 0x80d2ae83e9ce78f3,0xc1d72b7c6b426019, + 0xa1075a24e4421730,0xb24cf65b8612f81f, + 0xc94930ae1d529cfc,0xdee033f26797b627, + 0xfb9b7cd9a4a7443c,0x169840ef017da3b1, + 0x9d412e0806e88aa5,0x8e1f289560ee864e, + 0xc491798a08a2ad4e,0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2,0xae10af696774b1db, + 0x9991a6f3d6bf1765,0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f,0x17fd090a58d32af3, + 0xeff394dcff8a948e,0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9,0x4abdaf101564f98e, + 0xbb764c4ca7a4440f,0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513,0x84c86189216dc5ed, + 0x92746b9be2f8552c,0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77,0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515,0xfabaf3feaa5334a, + 0x8f05b1163ba6832d,0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8,0x743e20e9ef511012, + 0xdf78e4b2bd342cf6,0x914da9246b255416, + 0x8bab8eefb6409c1a,0x1ad089b6c2f7548e, + 0xae9672aba3d0c320,0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8,0xc9e5d72d90a2741e, + 0x8865899617fb1871,0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d,0xddbb901b98feeab7, + 0xd51ea6fa85785631,0x552a74227f3ea565, + 0x8533285c936b35de,0xd53a88958f87275f, + 0xa67ff273b8460356,0x8a892abaf368f137, + 0xd01fef10a657842c,0x2d2b7569b0432d85, + 0x8213f56a67f6b29b,0x9c3b29620e29fc73, + 0xa298f2c501f45f42,0x8349f3ba91b47b8f, + 0xcb3f2f7642717713,0x241c70a936219a73, + 0xfe0efb53d30dd4d7,0xed238cd383aa0110, + 0x9ec95d1463e8a506,0xf4363804324a40aa, + 0xc67bb4597ce2ce48,0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da,0xdd94b7868e94050a, + 0x9b10a4e5e9913128,0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72,0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf,0xbc633b39673c8cec, + 0x976e41088617ca01,0xd5be0503e085d813, + 0xbd49d14aa79dbc82,0x4b2d8644d8a74e18, + 0xec9c459d51852ba2,0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45,0xcabb90e5c942b503, + 0xb8da1662e7b00a17,0x3d6a751f3b936243, + 0xe7109bfba19c0c9d,0xcc512670a783ad4, + 0x906a617d450187e2,0x27fb2b80668b24c5, + 0xb484f9dc9641e9da,0xb1f9f660802dedf6, + 0xe1a63853bbd26451,0x5e7873f8a0396973, + 0x8d07e33455637eb2,0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f,0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7,0x7641a140cc7810fb, + 0x89b9b3e11b6329ba,0xa9e904c87fcb0a9d, + 0xac2820d9623bf429,0x546345fa9fbdcd44, + 0xd732290fbacaf133,0xa97c177947ad4095, + 0x867f59a9d4bed6c0,0x49ed8eabcccc485d, + 0xa81f301449ee8c70,0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c,0x73832eec6fff3111, + 0x83585d8fd9c25db7,0xc831fd53c5ff7eab, + 0xa42e74f3d032f525,0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f,0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85,0x7980d163cf5b81b3, + 0xa0555e361951c366,0xd7e105bcc332621f, + 0xc86ab5c39fa63440,0x8dd9472bf3fefaa7, + 0xfa856334878fc150,0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2,0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07,0xa862f80ec4700c8, + 0xf4a642e14c6262c8,0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd,0x8038d51cb897789c, + 0xbf21e44003acdd2c,0xe0470a63e6bd56c3, + 0xeeea5d5004981478,0x1858ccfce06cac74, + 0x95527a5202df0ccb,0xf37801e0c43ebc8, + 0xbaa718e68396cffd,0xd30560258f54e6ba, + 0xe950df20247c83fd,0x47c6b82ef32a2069, + 0x91d28b7416cdd27e,0x4cdc331d57fa5441, + 0xb6472e511c81471d,0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5,0x58180fddd97723a6, + 0x8e679c2f5e44ff8f,0x570f09eaa7ea7648,}; + +#endif // SIMDJSON_SRC_NUMBERPARSING_TABLES_CPP +/* end file internal/numberparsing_tables.cpp */ +/* including internal/simdprune_tables.cpp: #include */ +/* begin file internal/simdprune_tables.cpp */ +#ifndef SIMDJSON_SRC_SIMDPRUNE_TABLES_CPP +#define SIMDJSON_SRC_SIMDPRUNE_TABLES_CPP + +/* including simdjson/implementation_detection.h: #include */ +/* begin file simdjson/implementation_detection.h */ +#ifndef SIMDJSON_IMPLEMENTATION_DETECTION_H +#define SIMDJSON_IMPLEMENTATION_DETECTION_H + +/* skipped duplicate #include "simdjson/base.h" */ + +// 0 is reserved, because undefined SIMDJSON_IMPLEMENTATION equals 0 in preprocessor macros. +#define SIMDJSON_IMPLEMENTATION_ID_arm64 1 +#define SIMDJSON_IMPLEMENTATION_ID_fallback 2 +#define SIMDJSON_IMPLEMENTATION_ID_haswell 3 +#define SIMDJSON_IMPLEMENTATION_ID_icelake 4 +#define SIMDJSON_IMPLEMENTATION_ID_ppc64 5 +#define SIMDJSON_IMPLEMENTATION_ID_westmere 6 + +#define SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) SIMDJSON_CAT(SIMDJSON_IMPLEMENTATION_ID_, IMPL) +#define SIMDJSON_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_IMPLEMENTATION) + +#define SIMDJSON_IMPLEMENTATION_IS(IMPL) SIMDJSON_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +// +// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order +// in which we include them. +// + +#ifndef SIMDJSON_IMPLEMENTATION_ARM64 +#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 + +// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE +#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) +#endif + +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#endif + +// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_HASWELL +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +// if icelake is always available, never enable haswell. +#define SIMDJSON_IMPLEMENTATION_HASWELL 0 +#else +#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 +#endif +#endif +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#endif + +// Default Westmere to on if this is x86-64. +#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// if icelake or haswell are always available, never enable westmere. +#define SIMDJSON_IMPLEMENTATION_WESTMERE 0 +#else +#define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) + +#ifndef SIMDJSON_IMPLEMENTATION_PPC64 +#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX + +// Default Fallback to on unless a builtin implementation has already been selected. +#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK +#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 +// if anything at all except fallback can always run, then disable fallback. +#define SIMDJSON_IMPLEMENTATION_FALLBACK 0 +#else +#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK + +// Determine the best builtin implementation +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION + +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake +#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell +#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere +#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 +#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 +#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK +#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback +#else +#error "All possible implementations (including fallback) have been disabled! simdjson will not run." +#endif + +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION + +#define SIMDJSON_BUILTIN_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_BUILTIN_IMPLEMENTATION) +#define SIMDJSON_BUILTIN_IMPLEMENTATION_IS(IMPL) SIMDJSON_BUILTIN_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +#endif // SIMDJSON_IMPLEMENTATION_DETECTION_H +/* end file simdjson/implementation_detection.h */ + +#if SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable +SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256] = { + 0, 2, 2, 4, 2, 4, 4, 6, 2, 4, 4, 6, 4, 6, 6, 8, 2, 4, 4, + 6, 4, 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 2, 4, 4, 6, 4, 6, + 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, + 8, 8, 10, 8, 10, 10, 12, 2, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, + 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, + 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, 8, + 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 2, 4, 4, 6, 4, + 6, 6, 8, 4, 6, 6, 8, 6, 8, 8, 10, 4, 6, 6, 8, 6, 8, 8, 10, + 6, 8, 8, 10, 8, 10, 10, 12, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, + 10, 8, 10, 10, 12, 6, 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, + 12, 14, 4, 6, 6, 8, 6, 8, 8, 10, 6, 8, 8, 10, 8, 10, 10, 12, 6, + 8, 8, 10, 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 6, 8, 8, 10, + 8, 10, 10, 12, 8, 10, 10, 12, 10, 12, 12, 14, 8, 10, 10, 12, 10, 12, 12, + 14, 10, 12, 12, 14, 12, 14, 14, 16}; + +SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x08, + 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, + 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x08, 0x09, 0x0a, 0x0b, + 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, +}; + +// 256 * 8 bytes = 2kB, easily fits in cache. +SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256] = { + 0x0706050403020100, 0x0007060504030201, 0x0007060504030200, + 0x0000070605040302, 0x0007060504030100, 0x0000070605040301, + 0x0000070605040300, 0x0000000706050403, 0x0007060504020100, + 0x0000070605040201, 0x0000070605040200, 0x0000000706050402, + 0x0000070605040100, 0x0000000706050401, 0x0000000706050400, + 0x0000000007060504, 0x0007060503020100, 0x0000070605030201, + 0x0000070605030200, 0x0000000706050302, 0x0000070605030100, + 0x0000000706050301, 0x0000000706050300, 0x0000000007060503, + 0x0000070605020100, 0x0000000706050201, 0x0000000706050200, + 0x0000000007060502, 0x0000000706050100, 0x0000000007060501, + 0x0000000007060500, 0x0000000000070605, 0x0007060403020100, + 0x0000070604030201, 0x0000070604030200, 0x0000000706040302, + 0x0000070604030100, 0x0000000706040301, 0x0000000706040300, + 0x0000000007060403, 0x0000070604020100, 0x0000000706040201, + 0x0000000706040200, 0x0000000007060402, 0x0000000706040100, + 0x0000000007060401, 0x0000000007060400, 0x0000000000070604, + 0x0000070603020100, 0x0000000706030201, 0x0000000706030200, + 0x0000000007060302, 0x0000000706030100, 0x0000000007060301, + 0x0000000007060300, 0x0000000000070603, 0x0000000706020100, + 0x0000000007060201, 0x0000000007060200, 0x0000000000070602, + 0x0000000007060100, 0x0000000000070601, 0x0000000000070600, + 0x0000000000000706, 0x0007050403020100, 0x0000070504030201, + 0x0000070504030200, 0x0000000705040302, 0x0000070504030100, + 0x0000000705040301, 0x0000000705040300, 0x0000000007050403, + 0x0000070504020100, 0x0000000705040201, 0x0000000705040200, + 0x0000000007050402, 0x0000000705040100, 0x0000000007050401, + 0x0000000007050400, 0x0000000000070504, 0x0000070503020100, + 0x0000000705030201, 0x0000000705030200, 0x0000000007050302, + 0x0000000705030100, 0x0000000007050301, 0x0000000007050300, + 0x0000000000070503, 0x0000000705020100, 0x0000000007050201, + 0x0000000007050200, 0x0000000000070502, 0x0000000007050100, + 0x0000000000070501, 0x0000000000070500, 0x0000000000000705, + 0x0000070403020100, 0x0000000704030201, 0x0000000704030200, + 0x0000000007040302, 0x0000000704030100, 0x0000000007040301, + 0x0000000007040300, 0x0000000000070403, 0x0000000704020100, + 0x0000000007040201, 0x0000000007040200, 0x0000000000070402, + 0x0000000007040100, 0x0000000000070401, 0x0000000000070400, + 0x0000000000000704, 0x0000000703020100, 0x0000000007030201, + 0x0000000007030200, 0x0000000000070302, 0x0000000007030100, + 0x0000000000070301, 0x0000000000070300, 0x0000000000000703, + 0x0000000007020100, 0x0000000000070201, 0x0000000000070200, + 0x0000000000000702, 0x0000000000070100, 0x0000000000000701, + 0x0000000000000700, 0x0000000000000007, 0x0006050403020100, + 0x0000060504030201, 0x0000060504030200, 0x0000000605040302, + 0x0000060504030100, 0x0000000605040301, 0x0000000605040300, + 0x0000000006050403, 0x0000060504020100, 0x0000000605040201, + 0x0000000605040200, 0x0000000006050402, 0x0000000605040100, + 0x0000000006050401, 0x0000000006050400, 0x0000000000060504, + 0x0000060503020100, 0x0000000605030201, 0x0000000605030200, + 0x0000000006050302, 0x0000000605030100, 0x0000000006050301, + 0x0000000006050300, 0x0000000000060503, 0x0000000605020100, + 0x0000000006050201, 0x0000000006050200, 0x0000000000060502, + 0x0000000006050100, 0x0000000000060501, 0x0000000000060500, + 0x0000000000000605, 0x0000060403020100, 0x0000000604030201, + 0x0000000604030200, 0x0000000006040302, 0x0000000604030100, + 0x0000000006040301, 0x0000000006040300, 0x0000000000060403, + 0x0000000604020100, 0x0000000006040201, 0x0000000006040200, + 0x0000000000060402, 0x0000000006040100, 0x0000000000060401, + 0x0000000000060400, 0x0000000000000604, 0x0000000603020100, + 0x0000000006030201, 0x0000000006030200, 0x0000000000060302, + 0x0000000006030100, 0x0000000000060301, 0x0000000000060300, + 0x0000000000000603, 0x0000000006020100, 0x0000000000060201, + 0x0000000000060200, 0x0000000000000602, 0x0000000000060100, + 0x0000000000000601, 0x0000000000000600, 0x0000000000000006, + 0x0000050403020100, 0x0000000504030201, 0x0000000504030200, + 0x0000000005040302, 0x0000000504030100, 0x0000000005040301, + 0x0000000005040300, 0x0000000000050403, 0x0000000504020100, + 0x0000000005040201, 0x0000000005040200, 0x0000000000050402, + 0x0000000005040100, 0x0000000000050401, 0x0000000000050400, + 0x0000000000000504, 0x0000000503020100, 0x0000000005030201, + 0x0000000005030200, 0x0000000000050302, 0x0000000005030100, + 0x0000000000050301, 0x0000000000050300, 0x0000000000000503, + 0x0000000005020100, 0x0000000000050201, 0x0000000000050200, + 0x0000000000000502, 0x0000000000050100, 0x0000000000000501, + 0x0000000000000500, 0x0000000000000005, 0x0000000403020100, + 0x0000000004030201, 0x0000000004030200, 0x0000000000040302, + 0x0000000004030100, 0x0000000000040301, 0x0000000000040300, + 0x0000000000000403, 0x0000000004020100, 0x0000000000040201, + 0x0000000000040200, 0x0000000000000402, 0x0000000000040100, + 0x0000000000000401, 0x0000000000000400, 0x0000000000000004, + 0x0000000003020100, 0x0000000000030201, 0x0000000000030200, + 0x0000000000000302, 0x0000000000030100, 0x0000000000000301, + 0x0000000000000300, 0x0000000000000003, 0x0000000000020100, + 0x0000000000000201, 0x0000000000000200, 0x0000000000000002, + 0x0000000000000100, 0x0000000000000001, 0x0000000000000000, + 0x0000000000000000, +}; //static uint64_t thintable_epi8[256] + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_ARM64 || SIMDJSON_IMPLEMENTATION_ICELAKE || SIMDJSON_IMPLEMENTATION_HASWELL || SIMDJSON_IMPLEMENTATION_WESTMERE || SIMDJSON_IMPLEMENTATION_PPC64 + +#endif // SIMDJSON_SRC_SIMDPRUNE_TABLES_CPP +/* end file internal/simdprune_tables.cpp */ + +/* including simdjson/generic/dependencies.h: #include */ +/* begin file simdjson/generic/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error simdjson/generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_GENERIC_DEPENDENCIES_H +#define SIMDJSON_GENERIC_DEPENDENCIES_H + +// Internal headers needed for generics. +// All includes referencing simdjson headers *not* under simdjson/generic must be here! +// Otherwise, amalgamation will fail. +/* skipped duplicate #include "simdjson/base.h" */ +/* including simdjson/implementation.h: #include "simdjson/implementation.h" */ +/* begin file simdjson/implementation.h */ +#ifndef SIMDJSON_IMPLEMENTATION_H +#define SIMDJSON_IMPLEMENTATION_H + +/* including simdjson/internal/atomic_ptr.h: #include "simdjson/internal/atomic_ptr.h" */ +/* begin file simdjson/internal/atomic_ptr.h */ +#ifndef SIMDJSON_INTERNAL_ATOMIC_PTR_H +#define SIMDJSON_INTERNAL_ATOMIC_PTR_H + +/* skipped duplicate #include "simdjson/base.h" */ +#include + +namespace simdjson { +namespace internal { + +template +class atomic_ptr { +public: + atomic_ptr(T *_ptr) : ptr{_ptr} {} + + operator const T*() const { return ptr.load(); } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr.load(); } + + operator T*() { return ptr.load(); } + T& operator*() { return *ptr; } + T* operator->() { return ptr.load(); } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } + +private: + std::atomic ptr; +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_ATOMIC_PTR_H +/* end file simdjson/internal/atomic_ptr.h */ +/* including simdjson/internal/dom_parser_implementation.h: #include "simdjson/internal/dom_parser_implementation.h" */ +/* begin file simdjson/internal/dom_parser_implementation.h */ +#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* skipped duplicate #include "simdjson/error.h" */ +#include + +namespace simdjson { + +namespace dom { +class document; +} // namespace dom + +/** +* This enum is used with the dom_parser_implementation::stage1 function. +* 1) The regular mode expects a fully formed JSON document. +* 2) The streaming_partial mode expects a possibly truncated +* input within a stream on JSON documents. +* 3) The stream_final mode allows us to truncate final +* unterminated strings. It is useful in conjunction with streaming_partial. +*/ +enum class stage1_mode { regular, streaming_partial, streaming_final}; + +/** + * Returns true if mode == streaming_partial or mode == streaming_final + */ +inline bool is_streaming(stage1_mode mode) { + // performance note: it is probably faster to check that mode is different + // from regular than checking that it is either streaming_partial or streaming_final. + return (mode != stage1_mode::regular); + // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); +} + + +namespace internal { + + +/** + * An implementation of simdjson's DOM parser for a particular CPU architecture. + * + * This class is expected to be accessed only by pointer, and never move in memory (though the + * pointer can move). + */ +class dom_parser_implementation { +public: + + /** + * @private For internal implementation use + * + * Run a full JSON parse on a single document (stage1 + stage2). + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param len The length of the json document. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 1 of the document parser. + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. + * @param len The length of the json document. + * @param streaming Whether this is being called by parser::parse_many. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser. + * + * Called after stage1(). + * + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser for parser::parse_many. + * + * Guaranteed only to be called after stage1(). + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. + */ + simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; + + /** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @param allow_replacement whether we allow a replacement character when the UTF-8 contains unmatched surrogate pairs. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept = 0; + + /** + * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a possibly invalid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept = 0; + + /** + * Change the capacity of this parser. + * + * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) + * and an CAPACITY error is returned if it is attempted. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_capacity(size_t capacity) noexcept = 0; + + /** + * Change the max depth of this parser. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_max_depth(size_t max_depth) noexcept = 0; + + /** + * Deallocate this parser. + */ + virtual ~dom_parser_implementation() = default; + + /** Number of structural indices passed from stage 1 to stage 2 */ + uint32_t n_structural_indexes{0}; + /** Structural indices passed from stage 1 to stage 2 */ + std::unique_ptr structural_indexes{}; + /** Next structural index to parse */ + uint32_t next_structural_index{0}; + + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_inline size_t capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; + + +protected: + /** + * The maximum document length this parser supports. + * + * Buffers are large enough to handle any document up to this length. + */ + size_t _capacity{0}; + + /** + * The maximum depth (number of nested objects and arrays) supported by this parser. + * + * Defaults to DEFAULT_MAX_DEPTH. + */ + size_t _max_depth{0}; + + // Declaring these so that subclasses can use them to implement their constructors. + simdjson_inline dom_parser_implementation() noexcept; + simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + + simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; + simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; +}; // class dom_parser_implementation + +simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { + return _capacity; +} + +simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { + return _max_depth; +} + +simdjson_warn_unused +inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { + if (this->max_depth() != max_depth) { + error_code err = set_max_depth(max_depth); + if (err) { return err; } + } + if (_capacity != capacity) { + error_code err = set_capacity(capacity); + if (err) { return err; } + } + return SUCCESS; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/internal/dom_parser_implementation.h */ + +#include + +namespace simdjson { + +/** + * Validate the UTF-8 string. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if the string is valid UTF-8. + */ +simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; +/** + * Validate the UTF-8 string. + * + * @param sv the string_view to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { + return validate_utf8(sv.data(), sv.size()); +} + +/** + * Validate the UTF-8 string. + * + * @param p the string to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { + return validate_utf8(s.data(), s.size()); +} + +/** + * An implementation of simdjson for a particular CPU architecture. + * + * Also used to maintain the currently active implementation. The active implementation is + * automatically initialized on first use to the most advanced implementation supported by the host. + */ +class implementation { +public: + + /** + * The name of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". + */ + virtual const std::string &name() const { return _name; } + + /** + * The description of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". + */ + virtual const std::string &description() const { return _description; } + + /** + * The instruction sets this implementation is compiled against + * and the current CPU match. This function may poll the current CPU/system + * and should therefore not be called too often if performance is a concern. + * + * @return true if the implementation can be safely used on the current system (determined at runtime). + */ + bool supported_by_runtime_system() const; + + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `internal::instruction_set::` values. + */ + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } + + /** + * @private For internal implementation use + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @param capacity The largest document that will be passed to the parser. + * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. + * @param dst The place to put the resulting parser implementation. + * @return the error code, or SUCCESS if there was no error. + */ + virtual error_code create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr &dst + ) const noexcept = 0; + + /** + * @private For internal implementation use + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * + * Overridden by each implementation. + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; + + + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; + +protected: + /** @private Construct an implementation with the given name and description. For subclasses. */ + simdjson_inline implementation( + std::string_view name, + std::string_view description, + uint32_t required_instruction_sets + ) : + _name(name), + _description(description), + _required_instruction_sets(required_instruction_sets) + { + } + virtual ~implementation()=default; + +private: + /** + * The name of this implementation. + */ + const std::string _name; + + /** + * The description of this implementation. + */ + const std::string _description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; +}; + +/** @private */ +namespace internal { + +/** + * The list of available implementations compiled into simdjson. + */ +class available_implementation_list { +public: + /** Get the list of available implementations compiled into simdjson */ + simdjson_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation * const *begin() const noexcept; + /** STL const end() iterator */ + const implementation * const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = simdjson::get_available_implementations()["westmere"]; + * if (!impl) { exit(1); } + * if (!imp->supported_by_runtime_system()) { exit(1); } + * simdjson::get_active_implementation() = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation * operator[](const std::string_view &name) const noexcept { + for (const implementation * impl : *this) { + if (impl->name() == name) { return impl; } + } + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = simdjson::available_implementation::detect_best_supported(); + * simdjson::get_active_implementation() = impl; + * + * @return the most advanced supported implementation for the current host, or an + * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported + * implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; +}; + +} // namespace internal + +/** + * The list of available implementations compiled into simdjson. + */ +extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); + +/** + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation supported by this hardware. + */ +extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation(); + +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_H +/* end file simdjson/implementation.h */ +/* skipped duplicate #include "simdjson/implementation_detection.h" */ +/* including simdjson/internal/instruction_set.h: #include "simdjson/internal/instruction_set.h" */ +/* begin file simdjson/internal/instruction_set.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_INSTRUCTION_SET_H +#define SIMDJSON_INTERNAL_INSTRUCTION_SET_H + +namespace simdjson { +namespace internal { + +enum instruction_set { + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000 +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_INSTRUCTION_SET_H +/* end file simdjson/internal/instruction_set.h */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ +/* skipped duplicate #include "simdjson/internal/jsoncharutils_tables.h" */ +/* skipped duplicate #include "simdjson/internal/numberparsing_tables.h" */ +/* including simdjson/internal/simdprune_tables.h: #include "simdjson/internal/simdprune_tables.h" */ +/* begin file simdjson/internal/simdprune_tables.h */ +#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable + +extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; + +extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; + +// 256 * 8 bytes = 2kB, easily fits in cache. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +/* end file simdjson/internal/simdprune_tables.h */ + +#endif // SIMDJSON_GENERIC_DEPENDENCIES_H +/* end file simdjson/generic/dependencies.h */ +/* including generic/dependencies.h: #include */ +/* begin file generic/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_SRC_GENERIC_DEPENDENCIES_H +#define SIMDJSON_SRC_GENERIC_DEPENDENCIES_H + +/* skipped duplicate #include */ + +#endif // SIMDJSON_SRC_GENERIC_DEPENDENCIES_H +/* end file generic/dependencies.h */ +/* including generic/stage1/dependencies.h: #include */ +/* begin file generic/stage1/dependencies.h */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_DEPENDENCIES_H +#define SIMDJSON_SRC_GENERIC_STAGE1_DEPENDENCIES_H + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_DEPENDENCIES_H +/* end file generic/stage1/dependencies.h */ +/* including generic/stage2/dependencies.h: #include */ +/* begin file generic/stage2/dependencies.h */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_DEPENDENCIES_H +#define SIMDJSON_SRC_GENERIC_STAGE2_DEPENDENCIES_H + +/* including simdjson/dom/document.h: #include */ +/* begin file simdjson/dom/document.h */ +#ifndef SIMDJSON_DOM_DOCUMENT_H +#define SIMDJSON_DOM_DOCUMENT_H + +/* including simdjson/dom/base.h: #include "simdjson/dom/base.h" */ +/* begin file simdjson/dom/base.h */ +#ifndef SIMDJSON_DOM_BASE_H +#define SIMDJSON_DOM_BASE_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { + +/** + * @brief A DOM API on top of the simdjson parser. + */ +namespace dom { + +/** The default batch size for parser.parse_many() and parser.load_many() */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). + */ +static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; + +class array; +class document; +class document_stream; +class element; +class key_value_pair; +class object; +class parser; + +#ifdef SIMDJSON_THREADS_ENABLED +struct stage1_worker; +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace dom + +namespace internal { + +template +class string_builder; +class tape_ref; + +} // namespace internal + +} // namespace simdjson + +#endif // SIMDJSON_DOM_BASE_H +/* end file simdjson/dom/base.h */ + +#include + +namespace simdjson { +namespace dom { + +/** + * A parsed JSON document. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + */ +class document { +public: + /** + * Create a document container with zero capacity. + * + * The parser will allocate capacity as needed. + */ + document() noexcept = default; + ~document() noexcept = default; + + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed and it is invalidated. + */ + document(document &&other) noexcept = default; + /** @private */ + document(const document &) = delete; // Disallow copying + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed. + */ + document &operator=(document &&other) noexcept = default; + /** @private */ + document &operator=(const document &) = delete; // Disallow copying + + /** + * Get the root element of this document as a JSON array. + */ + element root() const noexcept; + + /** + * @private Dump the raw tape for debugging. + * + * @param os the stream to output to. + * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). + */ + bool dump_raw_tape(std::ostream &os) const noexcept; + + /** @private Structural values. */ + std::unique_ptr tape{}; + + /** @private String values. + * + * Should be at least byte_capacity. + */ + std::unique_ptr string_buf{}; + /** @private Allocate memory to support + * input JSON documents of up to len bytes. + * + * When calling this function, you lose + * all the data. + * + * The memory allocation is strict: you + * can you use this function to increase + * or lower the amount of allocated memory. + * Passsing zero clears the memory. + */ + error_code allocate(size_t len) noexcept; + /** @private Capacity in bytes, in terms + * of how many bytes of input JSON we can + * support. + */ + size_t capacity() const noexcept; + + +private: + size_t allocated_capacity{0}; + friend class parser; +}; // class document + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file simdjson/dom/document.h */ +/* including simdjson/internal/tape_type.h: #include */ +/* begin file simdjson/internal/tape_type.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H +#define SIMDJSON_INTERNAL_TAPE_TYPE_H + +namespace simdjson { +namespace internal { + +/** + * The possible types in the tape. + */ +enum class tape_type { + ROOT = 'r', + START_ARRAY = '[', + START_OBJECT = '{', + END_ARRAY = ']', + END_OBJECT = '}', + STRING = '"', + INT64 = 'l', + UINT64 = 'u', + DOUBLE = 'd', + TRUE_VALUE = 't', + FALSE_VALUE = 'f', + NULL_VALUE = 'n' +}; // enum class tape_type + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H +/* end file simdjson/internal/tape_type.h */ + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_DEPENDENCIES_H +/* end file generic/stage2/dependencies.h */ + +/* including implementation.cpp: #include */ +/* begin file implementation.cpp */ +#ifndef SIMDJSON_SRC_IMPLEMENTATION_CPP +#define SIMDJSON_SRC_IMPLEMENTATION_CPP + +/* skipped duplicate #include */ +/* skipped duplicate #include */ +/* skipped duplicate #include */ +/* including internal/isadetection.h: #include */ +/* begin file internal/isadetection.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_ISADETECTION_H +#define SIMDJSON_INTERNAL_ISADETECTION_H + +/* skipped duplicate #include "simdjson/internal/instruction_set.h" */ + +#include +#include +#if defined(_MSC_VER) +#include +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) +#include +#endif + +namespace simdjson { +namespace internal { + +#if defined(__PPC64__) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::ALTIVEC; +} + +#elif defined(__aarch64__) || defined(_M_ARM64) + +static inline uint32_t detect_supported_architectures() { + return instruction_set::NEON; +} + +#elif defined(__x86_64__) || defined(_M_AMD64) // x64 + + +namespace { +// Can be found on Intel ISA Reference for CPUID +constexpr uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 +constexpr uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 +constexpr uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512f_bit = 1 << 16; ///< @private bit 16 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512dq_bit = 1 << 17; ///< @private bit 17 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512ifma_bit = 1 << 21; ///< @private bit 21 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512pf_bit = 1 << 26; ///< @private bit 26 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512er_bit = 1 << 27; ///< @private bit 27 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512cd_bit = 1 << 28; ///< @private bit 28 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512bw_bit = 1 << 30; ///< @private bit 30 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512vl_bit = 1U << 31; ///< @private bit 31 of EBX for EAX=0x7 +constexpr uint32_t cpuid_avx512vbmi2_bit = 1 << 6; ///< @private bit 6 of ECX for EAX=0x7 +constexpr uint64_t cpuid_avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX +constexpr uint64_t cpuid_avx512_saved = uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM +constexpr uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 +constexpr uint32_t cpuid_osxsave = (uint32_t(1) << 26) | (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1 +constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 +} + + + +static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, + uint32_t *edx) { +#if defined(_MSC_VER) + int cpu_info[4]; + __cpuidex(cpu_info, *eax, *ecx); + *eax = cpu_info[0]; + *ebx = cpu_info[1]; + *ecx = cpu_info[2]; + *edx = cpu_info[3]; +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) + uint32_t level = *eax; + __get_cpuid(level, eax, ebx, ecx, edx); +#else + uint32_t a = *eax, b, c = *ecx, d; + asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d)); + *eax = a; + *ebx = b; + *ecx = c; + *edx = d; +#endif +} + + +static inline uint64_t xgetbv() { +#if defined(_MSC_VER) + return _xgetbv(0); +#else + uint32_t xcr0_lo, xcr0_hi; + asm volatile("xgetbv\n\t" : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); + return xcr0_lo | (uint64_t(xcr0_hi) << 32); +#endif +} + +static inline uint32_t detect_supported_architectures() { + uint32_t eax, ebx, ecx, edx; + uint32_t host_isa = 0x0; + + // EBX for EAX=0x1 + eax = 0x1; + ecx = 0x0; + cpuid(&eax, &ebx, &ecx, &edx); + + if (ecx & cpuid_sse42_bit) { + host_isa |= instruction_set::SSE42; + } else { + return host_isa; // everything after is redundant + } + + if (ecx & cpuid_pclmulqdq_bit) { + host_isa |= instruction_set::PCLMULQDQ; + } + + + if ((ecx & cpuid_osxsave) != cpuid_osxsave) { + return host_isa; + } + + // xgetbv for checking if the OS saves registers + uint64_t xcr0 = xgetbv(); + + if ((xcr0 & cpuid_avx256_saved) == 0) { + return host_isa; + } + + // ECX for EAX=0x7 + eax = 0x7; + ecx = 0x0; + cpuid(&eax, &ebx, &ecx, &edx); + if (ebx & cpuid_avx2_bit) { + host_isa |= instruction_set::AVX2; + } + if (ebx & cpuid_bmi1_bit) { + host_isa |= instruction_set::BMI1; + } + + if (ebx & cpuid_bmi2_bit) { + host_isa |= instruction_set::BMI2; + } + + if (!((xcr0 & cpuid_avx512_saved) == cpuid_avx512_saved)) { + return host_isa; + } + + if (ebx & cpuid_avx512f_bit) { + host_isa |= instruction_set::AVX512F; + } + + if (ebx & cpuid_avx512dq_bit) { + host_isa |= instruction_set::AVX512DQ; + } + + if (ebx & cpuid_avx512ifma_bit) { + host_isa |= instruction_set::AVX512IFMA; + } + + if (ebx & cpuid_avx512pf_bit) { + host_isa |= instruction_set::AVX512PF; + } + + if (ebx & cpuid_avx512er_bit) { + host_isa |= instruction_set::AVX512ER; + } + + if (ebx & cpuid_avx512cd_bit) { + host_isa |= instruction_set::AVX512CD; + } + + if (ebx & cpuid_avx512bw_bit) { + host_isa |= instruction_set::AVX512BW; + } + + if (ebx & cpuid_avx512vl_bit) { + host_isa |= instruction_set::AVX512VL; + } + + if (ecx & cpuid_avx512vbmi2_bit) { + host_isa |= instruction_set::AVX512VBMI2; + } + + return host_isa; +} +#else // fallback + + +static inline uint32_t detect_supported_architectures() { + return instruction_set::DEFAULT; +} + + +#endif // end SIMD extension detection code + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_ISADETECTION_H +/* end file internal/isadetection.h */ + +#include + +namespace simdjson { + +bool implementation::supported_by_runtime_system() const { + uint32_t required_instruction_sets = this->required_instruction_sets(); + uint32_t supported_instruction_sets = internal::detect_supported_architectures(); + return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets); +} + +} // namespace simdjson + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_IMPLEMENTATION_ARM64 +/* including simdjson/arm64/implementation.h: #include */ +/* begin file simdjson/arm64/implementation.h */ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H +/* end file simdjson/arm64/implementation.h */ +namespace simdjson { +namespace internal { +static const arm64::implementation* get_arm64_singleton() { + static const arm64::implementation arm64_singleton{}; + return &arm64_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_ARM64 + +#if SIMDJSON_IMPLEMENTATION_FALLBACK +/* including simdjson/fallback/implementation.h: #include */ +/* begin file simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file simdjson/fallback/implementation.h */ +namespace simdjson { +namespace internal { +static const fallback::implementation* get_fallback_singleton() { + static const fallback::implementation fallback_singleton{}; + return &fallback_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_FALLBACK + + +#if SIMDJSON_IMPLEMENTATION_HASWELL +/* including simdjson/haswell/implementation.h: #include */ +/* begin file simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file simdjson/haswell/implementation.h */ +namespace simdjson { +namespace internal { +static const haswell::implementation* get_haswell_singleton() { + static const haswell::implementation haswell_singleton{}; + return &haswell_singleton; +} +} // namespace internal +} // namespace simdjson +#endif + +#if SIMDJSON_IMPLEMENTATION_ICELAKE +/* including simdjson/icelake/implementation.h: #include */ +/* begin file simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file simdjson/icelake/implementation.h */ +namespace simdjson { +namespace internal { +static const icelake::implementation* get_icelake_singleton() { + static const icelake::implementation icelake_singleton{}; + return &icelake_singleton; +} +} // namespace internal +} // namespace simdjson +#endif + +#if SIMDJSON_IMPLEMENTATION_PPC64 +/* including simdjson/ppc64/implementation.h: #include */ +/* begin file simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file simdjson/ppc64/implementation.h */ +namespace simdjson { +namespace internal { +static const ppc64::implementation* get_ppc64_singleton() { + static const ppc64::implementation ppc64_singleton{}; + return &ppc64_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_PPC64 + +#if SIMDJSON_IMPLEMENTATION_WESTMERE +/* including simdjson/westmere/implementation.h: #include */ +/* begin file simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/implementation.h */ +namespace simdjson { +namespace internal { +static const simdjson::westmere::implementation* get_westmere_singleton() { + static const simdjson::westmere::implementation westmere_singleton{}; + return &westmere_singleton; +} +} // namespace internal +} // namespace simdjson +#endif // SIMDJSON_IMPLEMENTATION_WESTMERE + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { +namespace internal { + +// Static array of known implementations. We're hoping these get baked into the executable +// without requiring a static initializer. + +/** + * @private Detects best supported implementation on first use, and sets it + */ +class detect_best_supported_implementation_on_first_use final : public implementation { +public: + const std::string &name() const noexcept final { return set_best()->name(); } + const std::string &description() const noexcept final { return set_best()->description(); } + uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); } + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final { + return set_best()->create_dom_parser_implementation(capacity, max_length, dst); + } + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final { + return set_best()->minify(buf, len, dst, dst_len); + } + simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override { + return set_best()->validate_utf8(buf, len); + } + simdjson_inline detect_best_supported_implementation_on_first_use() noexcept : implementation("best_supported_detector", "Detects the best supported implementation and sets it", 0) {} +private: + const implementation *set_best() const noexcept; +}; + +static const std::initializer_list& get_available_implementation_pointers() { + static const std::initializer_list available_implementation_pointers { +#if SIMDJSON_IMPLEMENTATION_ICELAKE + get_icelake_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL + get_haswell_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE + get_westmere_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_ARM64 + get_arm64_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 + get_ppc64_singleton(), +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK + get_fallback_singleton(), +#endif + }; // available_implementation_pointers + return available_implementation_pointers; +} + +// So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support +class unsupported_implementation final : public implementation { +public: + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t, + size_t, + std::unique_ptr& + ) const noexcept final { + return UNSUPPORTED_ARCHITECTURE; + } + simdjson_warn_unused error_code minify(const uint8_t *, size_t, uint8_t *, size_t &) const noexcept final override { + return UNSUPPORTED_ARCHITECTURE; + } + simdjson_warn_unused bool validate_utf8(const char *, size_t) const noexcept final override { + return false; // Just refuse to validate. Given that we have a fallback implementation + // it seems unlikely that unsupported_implementation will ever be used. If it is used, + // then it will flag all strings as invalid. The alternative is to return an error_code + // from which the user has to figure out whether the string is valid UTF-8... which seems + // like a lot of work just to handle the very unlikely case that we have an unsupported + // implementation. And, when it does happen (that we have an unsupported implementation), + // what are the chances that the programmer has a fallback? Given that *we* provide the + // fallback, it implies that the programmer would need a fallback for our fallback. + } + unsupported_implementation() : implementation("unsupported", "Unsupported CPU (no detected SIMD instructions)", 0) {} +}; + +const unsupported_implementation* get_unsupported_singleton() { + static const unsupported_implementation unsupported_singleton{}; + return &unsupported_singleton; +} + +size_t available_implementation_list::size() const noexcept { + return internal::get_available_implementation_pointers().size(); +} +const implementation * const *available_implementation_list::begin() const noexcept { + return internal::get_available_implementation_pointers().begin(); +} +const implementation * const *available_implementation_list::end() const noexcept { + return internal::get_available_implementation_pointers().end(); +} +const implementation *available_implementation_list::detect_best_supported() const noexcept { + // They are prelisted in priority order, so we just go down the list + uint32_t supported_instruction_sets = internal::detect_supported_architectures(); + for (const implementation *impl : internal::get_available_implementation_pointers()) { + uint32_t required_instruction_sets = impl->required_instruction_sets(); + if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; } + } + return get_unsupported_singleton(); // this should never happen? +} + +const implementation *detect_best_supported_implementation_on_first_use::set_best() const noexcept { + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *force_implementation_name = getenv("SIMDJSON_FORCE_IMPLEMENTATION"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (force_implementation_name) { + auto force_implementation = get_available_implementations()[force_implementation_name]; + if (force_implementation) { + return get_active_implementation() = force_implementation; + } else { + // Note: abort() and stderr usage within the library is forbidden. + return get_active_implementation() = get_unsupported_singleton(); + } + } + return get_active_implementation() = get_available_implementations().detect_best_supported(); +} + +} // namespace internal + +SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations() { + static const internal::available_implementation_list available_implementations{}; + return available_implementations; +} + +SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation() { + static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton; + static internal::atomic_ptr active_implementation{&detect_best_supported_implementation_on_first_use_singleton}; + return active_implementation; +} + +simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept { + return get_active_implementation()->minify(reinterpret_cast(buf), len, reinterpret_cast(dst), dst_len); +} +simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept { + return get_active_implementation()->validate_utf8(buf, len); +} +const implementation * builtin_implementation() { + static const implementation * builtin_impl = get_available_implementations()[SIMDJSON_STRINGIFY(SIMDJSON_BUILTIN_IMPLEMENTATION)]; + assert(builtin_impl); + return builtin_impl; +} + +} // namespace simdjson + +#endif // SIMDJSON_SRC_IMPLEMENTATION_CPP +/* end file implementation.cpp */ + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_IMPLEMENTATION_ARM64 +/* including arm64.cpp: #include */ +/* begin file arm64.cpp */ +#ifndef SIMDJSON_SRC_ARM64_CPP +#define SIMDJSON_SRC_ARM64_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/arm64.h: #include */ +/* begin file simdjson/arm64.h */ +#ifndef SIMDJSON_ARM64_H +#define SIMDJSON_ARM64_H + +/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if _M_ARM64 +// __umulh requires intrin.h +#include +#endif // _M_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +/** + * make_uint8x16_t initializes a SIMD register (uint8x16_t). + * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...} + * is not recognized under Visual Studio! This is a workaround. + * Using a std::initializer_list as a parameter resulted in + * inefficient code. With the current approach, if the parameters are + * compile-time constants, + * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}. + * You should not use this function except for compile-time constants: + * it is not efficient. + */ +simdjson_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, + uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, + uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { + // Doing a load like so end ups generating worse code. + // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_u8(array); + uint8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_u8(x1, x, 0); + x = vsetq_lane_u8(x2, x, 1); + x = vsetq_lane_u8(x3, x, 2); + x = vsetq_lane_u8(x4, x, 3); + x = vsetq_lane_u8(x5, x, 4); + x = vsetq_lane_u8(x6, x, 5); + x = vsetq_lane_u8(x7, x, 6); + x = vsetq_lane_u8(x8, x, 7); + x = vsetq_lane_u8(x9, x, 8); + x = vsetq_lane_u8(x10, x, 9); + x = vsetq_lane_u8(x11, x, 10); + x = vsetq_lane_u8(x12, x, 11); + x = vsetq_lane_u8(x13, x, 12); + x = vsetq_lane_u8(x14, x, 13); + x = vsetq_lane_u8(x15, x, 14); + x = vsetq_lane_u8(x16, x, 15); + return x; +} + +simdjson_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { + uint8x8_t x{}; + x = vset_lane_u8(x1, x, 0); + x = vset_lane_u8(x2, x, 1); + x = vset_lane_u8(x3, x, 2); + x = vset_lane_u8(x4, x, 3); + x = vset_lane_u8(x5, x, 4); + x = vset_lane_u8(x6, x, 5); + x = vset_lane_u8(x7, x, 6); + x = vset_lane_u8(x8, x, 7); + return x; +} + +// We have to do the same work for make_int8x16_t +simdjson_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, + int8_t x5, int8_t x6, int8_t x7, int8_t x8, + int8_t x9, int8_t x10, int8_t x11, int8_t x12, + int8_t x13, int8_t x14, int8_t x15, int8_t x16) { + // Doing a load like so end ups generating worse code. + // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_s8(array); + int8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_s8(x1, x, 0); + x = vsetq_lane_s8(x2, x, 1); + x = vsetq_lane_s8(x3, x, 2); + x = vsetq_lane_s8(x4, x, 3); + x = vsetq_lane_s8(x5, x, 4); + x = vsetq_lane_s8(x6, x, 5); + x = vsetq_lane_s8(x7, x, 6); + x = vsetq_lane_s8(x8, x, 7); + x = vsetq_lane_s8(x9, x, 8); + x = vsetq_lane_s8(x10, x, 9); + x = vsetq_lane_s8(x11, x, 10); + x = vsetq_lane_s8(x12, x, 11); + x = vsetq_lane_s8(x13, x, 12); + x = vsetq_lane_s8(x14, x, 13); + x = vsetq_lane_s8(x15, x, 14); + x = vsetq_lane_s8(x16, x, 15); + return x; +} + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including simdjson/generic/amalgamated.h for arm64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for arm64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for arm64 */ +/* including simdjson/generic/jsoncharutils.h for arm64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for arm64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for arm64 */ +/* including simdjson/generic/atomparsing.h for arm64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for arm64 */ +/* including simdjson/generic/dom_parser_implementation.h for arm64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for arm64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { +namespace arm64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for arm64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +/* including simdjson/generic/numberparsing.h for arm64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for arm64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +/* end file simdjson/generic/amalgamated.h for arm64 */ +/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_ARM64_H +/* end file simdjson/arm64.h */ +/* including simdjson/arm64/implementation.h: #include */ +/* begin file simdjson/arm64/implementation.h */ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H +/* end file simdjson/arm64/implementation.h */ + +/* including simdjson/arm64/begin.h: #include */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if _M_ARM64 +// __umulh requires intrin.h +#include +#endif // _M_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +/** + * make_uint8x16_t initializes a SIMD register (uint8x16_t). + * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...} + * is not recognized under Visual Studio! This is a workaround. + * Using a std::initializer_list as a parameter resulted in + * inefficient code. With the current approach, if the parameters are + * compile-time constants, + * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}. + * You should not use this function except for compile-time constants: + * it is not efficient. + */ +simdjson_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, + uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, + uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { + // Doing a load like so end ups generating worse code. + // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_u8(array); + uint8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_u8(x1, x, 0); + x = vsetq_lane_u8(x2, x, 1); + x = vsetq_lane_u8(x3, x, 2); + x = vsetq_lane_u8(x4, x, 3); + x = vsetq_lane_u8(x5, x, 4); + x = vsetq_lane_u8(x6, x, 5); + x = vsetq_lane_u8(x7, x, 6); + x = vsetq_lane_u8(x8, x, 7); + x = vsetq_lane_u8(x9, x, 8); + x = vsetq_lane_u8(x10, x, 9); + x = vsetq_lane_u8(x11, x, 10); + x = vsetq_lane_u8(x12, x, 11); + x = vsetq_lane_u8(x13, x, 12); + x = vsetq_lane_u8(x14, x, 13); + x = vsetq_lane_u8(x15, x, 14); + x = vsetq_lane_u8(x16, x, 15); + return x; +} + +simdjson_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { + uint8x8_t x{}; + x = vset_lane_u8(x1, x, 0); + x = vset_lane_u8(x2, x, 1); + x = vset_lane_u8(x3, x, 2); + x = vset_lane_u8(x4, x, 3); + x = vset_lane_u8(x5, x, 4); + x = vset_lane_u8(x6, x, 5); + x = vset_lane_u8(x7, x, 6); + x = vset_lane_u8(x8, x, 7); + return x; +} + +// We have to do the same work for make_int8x16_t +simdjson_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, + int8_t x5, int8_t x6, int8_t x7, int8_t x8, + int8_t x9, int8_t x10, int8_t x11, int8_t x12, + int8_t x13, int8_t x14, int8_t x15, int8_t x16) { + // Doing a load like so end ups generating worse code. + // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_s8(array); + int8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_s8(x1, x, 0); + x = vsetq_lane_s8(x2, x, 1); + x = vsetq_lane_s8(x3, x, 2); + x = vsetq_lane_s8(x4, x, 3); + x = vsetq_lane_s8(x5, x, 4); + x = vsetq_lane_s8(x6, x, 5); + x = vsetq_lane_s8(x7, x, 6); + x = vsetq_lane_s8(x8, x, 7); + x = vsetq_lane_s8(x9, x, 8); + x = vsetq_lane_s8(x10, x, 9); + x = vsetq_lane_s8(x11, x, 10); + x = vsetq_lane_s8(x12, x, 11); + x = vsetq_lane_s8(x13, x, 12); + x = vsetq_lane_s8(x14, x, 13); + x = vsetq_lane_s8(x15, x, 14); + x = vsetq_lane_s8(x16, x, 15); + return x; +} + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including generic/amalgamated.h for arm64: #include */ +/* begin file generic/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for arm64: #include */ +/* begin file generic/base.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for arm64 */ +/* including generic/dom_parser_implementation.h for arm64: #include */ +/* begin file generic/dom_parser_implementation.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace arm64 { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for arm64 */ +/* including generic/json_character_block.h for arm64: #include */ +/* begin file generic/json_character_block.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for arm64 */ +/* end file generic/amalgamated.h for arm64 */ +/* including generic/stage1/amalgamated.h for arm64: #include */ +/* begin file generic/stage1/amalgamated.h for arm64 */ +// Stuff other things depend on +/* including generic/stage1/base.h for arm64: #include */ +/* begin file generic/stage1/base.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for arm64 */ +/* including generic/stage1/buf_block_reader.h for arm64: #include */ +/* begin file generic/stage1/buf_block_reader.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for arm64 */ +/* including generic/stage1/json_escape_scanner.h for arm64: #include */ +/* begin file generic/stage1/json_escape_scanner.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for arm64 */ +/* including generic/stage1/json_string_scanner.h for arm64: #include */ +/* begin file generic/stage1/json_string_scanner.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for arm64 */ +/* including generic/stage1/utf8_lookup4_algorithm.h for arm64: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for arm64 */ +/* including generic/stage1/json_scanner.h for arm64: #include */ +/* begin file generic/stage1/json_scanner.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for arm64 */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for arm64: #include */ +/* begin file generic/stage1/find_next_document_index.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for arm64 */ +/* including generic/stage1/json_minifier.h for arm64: #include */ +/* begin file generic/stage1/json_minifier.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for arm64 */ +/* including generic/stage1/json_structural_indexer.h for arm64: #include */ +/* begin file generic/stage1/json_structural_indexer.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for arm64 */ +/* including generic/stage1/utf8_validator.h for arm64: #include */ +/* begin file generic/stage1/utf8_validator.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for arm64 */ +/* end file generic/stage1/amalgamated.h for arm64 */ +/* including generic/stage2/amalgamated.h for arm64: #include */ +/* begin file generic/stage2/amalgamated.h for arm64 */ +// Stuff other things depend on +/* including generic/stage2/base.h for arm64: #include */ +/* begin file generic/stage2/base.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for arm64 */ +/* including generic/stage2/tape_writer.h for arm64: #include */ +/* begin file generic/stage2/tape_writer.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for arm64 */ +/* including generic/stage2/logger.h for arm64: #include */ +/* begin file generic/stage2/logger.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace arm64 { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for arm64 */ +/* including generic/stage2/stringparsing.h for arm64: #include */ +/* begin file generic/stage2/stringparsing.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for arm64 */ +/* including generic/stage2/structural_iterator.h for arm64: #include */ +/* begin file generic/stage2/structural_iterator.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for arm64 */ +/* including generic/stage2/tape_builder.h for arm64: #include */ +/* begin file generic/stage2/tape_builder.h for arm64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace arm64 { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for arm64 */ +/* end file generic/stage2/amalgamated.h for arm64 */ + +// +// Stage 1 +// +namespace simdjson { +namespace arm64 { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // Functional programming causes trouble with Visual Studio. + // Keeping this version in comments since it is much nicer: + // auto v = in.map([&](simd8 chunk) { + // auto nib_lo = chunk & 0xf; + // auto nib_hi = chunk.shr<4>(); + // auto shuf_lo = nib_lo.lookup_16(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + // auto shuf_hi = nib_hi.lookup_16(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + // return shuf_lo & shuf_hi; + // }); + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + simd8x64 v( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); + + + // We compute whitespace and op separately. If the code later only use one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). *However* if we only need spaces, + // it is likely that we will still compute 'v' above with two lookup_16: one + // could do it a bit cheaper. This is in contrast with the x64 implementations + // where we can, efficiently, do the white space and structural matching + // separately. One reason for this difference is that on ARM NEON, the table + // lookups either zero or leave unchanged the characters exceeding 0xF whereas + // on x64, the equivalent instruction (pshufb) automatically applies a mask, + // ignoring the 4 most significant bits. Thus the x64 implementation is + // optimized differently. This being said, if you use this code strictly + // just for minification (or just to identify the structural characters), + // there is a small untaken optimization opportunity here. We deliberately + // do not pick it up. + + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + simd8 bits = input.reduce_or(); + return bits.max_val() < 0x80u; +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1 >= uint8_t(0xc0u); + simd8 is_third_byte = prev2 >= uint8_t(0xe0u); + simd8 is_fourth_byte = prev3 >= uint8_t(0xf0u); + // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well. + // This will work fine because we only have to report errors for cases with 0-1 lead bytes. + // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is + // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character. + // The error will be detected there. + return is_second_byte ^ is_third_byte ^ is_fourth_byte; +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2 >= uint8_t(0xe0u); + simd8 is_fourth_byte = prev3 >= uint8_t(0xf0u); + return is_third_byte ^ is_fourth_byte; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace arm64 { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return arm64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return arm64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return arm64::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept { + return arm64::stringparsing::parse_string(src, dst, allow_replacement); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return arm64::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace arm64 +} // namespace simdjson + +/* including simdjson/arm64/end.h: #include */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_SRC_ARM64_CPP +/* end file arm64.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_FALLBACK +/* including fallback.cpp: #include */ +/* begin file fallback.cpp */ +#ifndef SIMDJSON_SRC_FALLBACK_CPP +#define SIMDJSON_SRC_FALLBACK_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/fallback.h: #include */ +/* begin file simdjson/fallback.h */ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H + +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/amalgamated.h for fallback: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for fallback: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for fallback */ +/* including simdjson/generic/jsoncharutils.h for fallback: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for fallback */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for fallback */ +/* including simdjson/generic/atomparsing.h for fallback: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for fallback */ +/* including simdjson/generic/dom_parser_implementation.h for fallback: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for fallback */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace fallback +} // namespace simdjson + +namespace simdjson { +namespace fallback { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for fallback */ +/* including simdjson/generic/implementation_simdjson_result_base.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +/* including simdjson/generic/numberparsing.h for fallback: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for fallback */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +/* end file simdjson/generic/amalgamated.h for fallback */ +/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ + +#endif // SIMDJSON_FALLBACK_H +/* end file simdjson/fallback.h */ +/* including simdjson/fallback/implementation.h: #include */ +/* begin file simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file simdjson/fallback/implementation.h */ + +/* including simdjson/fallback/begin.h: #include */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including generic/stage1/find_next_document_index.h for fallback: #include */ +/* begin file generic/stage1/find_next_document_index.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for fallback */ +/* including generic/stage2/stringparsing.h for fallback: #include */ +/* begin file generic/stage2/stringparsing.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for fallback */ +/* including generic/stage2/logger.h for fallback: #include */ +/* begin file generic/stage2/logger.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace fallback { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for fallback */ +/* including generic/stage2/tape_writer.h for fallback: #include */ +/* begin file generic/stage2/tape_writer.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for fallback */ +/* including generic/stage2/tape_builder.h for fallback: #include */ +/* begin file generic/stage2/tape_builder.h for fallback */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace fallback { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for fallback */ + +// +// Stage 1 +// + +namespace simdjson { +namespace fallback { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) fallback::dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { +namespace stage1 { + +class structural_scanner { +public: + +simdjson_inline structural_scanner(dom_parser_implementation &_parser, stage1_mode _partial) + : buf{_parser.buf}, + next_structural_index{_parser.structural_indexes.get()}, + parser{_parser}, + len{static_cast(_parser.len)}, + partial{_partial} { +} + +simdjson_inline void add_structural() { + *next_structural_index = idx; + next_structural_index++; +} + +simdjson_inline bool is_continuation(uint8_t c) { + return (c & 0xc0) == 0x80; +} + +simdjson_inline void validate_utf8_character() { + // Continuation + if (simdjson_unlikely((buf[idx] & 0x40) == 0)) { + // extra continuation + error = UTF8_ERROR; + idx++; + return; + } + + // 2-byte + if ((buf[idx] & 0x20) == 0) { + // missing continuation + if (simdjson_unlikely(idx+1 > len || !is_continuation(buf[idx+1]))) { + if (idx+1 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 1100000_ 10______ + if (buf[idx] <= 0xc1) { error = UTF8_ERROR; } + idx += 2; + return; + } + + // 3-byte + if ((buf[idx] & 0x10) == 0) { + // missing continuation + if (simdjson_unlikely(idx+2 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11100000 100_____ ________ + if (buf[idx] == 0xe0 && buf[idx+1] <= 0x9f) { error = UTF8_ERROR; } + // surrogates: U+D800-U+DFFF 11101101 101_____ + if (buf[idx] == 0xed && buf[idx+1] >= 0xa0) { error = UTF8_ERROR; } + idx += 3; + return; + } + + // 4-byte + // missing continuation + if (simdjson_unlikely(idx+3 > len || !is_continuation(buf[idx+1]) || !is_continuation(buf[idx+2]) || !is_continuation(buf[idx+3]))) { + if (idx+2 > len && is_streaming(partial)) { idx = len; return; } + error = UTF8_ERROR; + idx++; + return; + } + // overlong: 11110000 1000____ ________ ________ + if (buf[idx] == 0xf0 && buf[idx+1] <= 0x8f) { error = UTF8_ERROR; } + // too large: > U+10FFFF: + // 11110100 (1001|101_)____ + // 1111(1___|011_|0101) 10______ + // also includes 5, 6, 7 and 8 byte characters: + // 11111___ + if (buf[idx] == 0xf4 && buf[idx+1] >= 0x90) { error = UTF8_ERROR; } + if (buf[idx] >= 0xf5) { error = UTF8_ERROR; } + idx += 4; +} + +// Returns true if the string is unclosed. +simdjson_inline bool validate_string() { + idx++; // skip first quote + while (idx < len && buf[idx] != '"') { + if (buf[idx] == '\\') { + idx += 2; + } else if (simdjson_unlikely(buf[idx] & 0x80)) { + validate_utf8_character(); + } else { + if (buf[idx] < 0x20) { error = UNESCAPED_CHARS; } + idx++; + } + } + if (idx >= len) { return true; } + return false; +} + +simdjson_inline bool is_whitespace_or_operator(uint8_t c) { + switch (c) { + case '{': case '}': case '[': case ']': case ',': case ':': + case ' ': case '\r': case '\n': case '\t': + return true; + default: + return false; + } +} + +// +// Parse the entire input in STEP_SIZE-byte chunks. +// +simdjson_inline error_code scan() { + bool unclosed_string = false; + for (;idx 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + parser.n_structural_indexes = new_structural_indexes; + } else if(partial == stage1_mode::streaming_final) { + if(unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (parser.n_structural_indexes == 0) { return EMPTY; } + } else if(unclosed_string) { error = UNCLOSED_STRING; } + return error; +} + +private: + const uint8_t *buf; + uint32_t *next_structural_index; + dom_parser_implementation &parser; + uint32_t len; + uint32_t idx{0}; + error_code error{SUCCESS}; + stage1_mode partial; +}; // structural_scanner + +} // namespace stage1 +} // unnamed namespace + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode partial) noexcept { + this->buf = _buf; + this->len = _len; + stage1::structural_scanner scanner(*this, partial); + return scanner.scan(); +} + +// big table for the minifier +static uint8_t jump_table[256 * 3] = { + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, + 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, + 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, + 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, +}; + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + size_t i = 0, pos = 0; + uint8_t quote = 0; + uint8_t nonescape = 1; + + while (i < len) { + unsigned char c = buf[i]; + uint8_t *meta = jump_table + 3 * c; + + quote = quote ^ (meta[0] & nonescape); + dst[pos] = c; + pos += meta[2] | quote; + + i += 1; + nonescape = uint8_t(~nonescape) | (meta[1]); + } + dst_len = pos; // we intentionally do not work with a reference + // for fear of aliasing + return quote ? UNCLOSED_STRING : SUCCESS; +} + +// credit: based on code from Google Fuchsia (Apache Licensed) +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + const uint8_t *data = reinterpret_cast(buf); + uint64_t pos = 0; + uint32_t code_point = 0; + while (pos < len) { + // check of the next 8 bytes are ascii. + uint64_t next_pos = pos + 16; + if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii + uint64_t v1; + memcpy(&v1, data + pos, sizeof(uint64_t)); + uint64_t v2; + memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t)); + uint64_t v{v1 | v2}; + if ((v & 0x8080808080808080) == 0) { + pos = next_pos; + continue; + } + } + unsigned char byte = data[pos]; + if (byte < 0x80) { + pos++; + continue; + } else if ((byte & 0xe0) == 0xc0) { + next_pos = pos + 2; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + // range check + code_point = (byte & 0x1f) << 6 | (data[pos + 1] & 0x3f); + if (code_point < 0x80 || 0x7ff < code_point) { return false; } + } else if ((byte & 0xf0) == 0xe0) { + next_pos = pos + 3; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + // range check + code_point = (byte & 0x0f) << 12 | + (data[pos + 1] & 0x3f) << 6 | + (data[pos + 2] & 0x3f); + if (code_point < 0x800 || 0xffff < code_point || + (0xd7ff < code_point && code_point < 0xe000)) { + return false; + } + } else if ((byte & 0xf8) == 0xf0) { // 0b11110000 + next_pos = pos + 4; + if (next_pos > len) { return false; } + if ((data[pos + 1] & 0xc0) != 0x80) { return false; } + if ((data[pos + 2] & 0xc0) != 0x80) { return false; } + if ((data[pos + 3] & 0xc0) != 0x80) { return false; } + // range check + code_point = + (byte & 0x07) << 18 | (data[pos + 1] & 0x3f) << 12 | + (data[pos + 2] & 0x3f) << 6 | (data[pos + 3] & 0x3f); + if (code_point <= 0xffff || 0x10ffff < code_point) { return false; } + } else { + // we may have a continuation + return false; + } + pos = next_pos; + } + return true; +} + +} // namespace fallback +} // namespace simdjson + +// +// Stage 2 +// + +namespace simdjson { +namespace fallback { + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return fallback::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return fallback::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace fallback +} // namespace simdjson + +/* including simdjson/fallback/end.h: #include */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ + +#endif // SIMDJSON_SRC_FALLBACK_CPP +/* end file fallback.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_HASWELL +/* including haswell.cpp: #include */ +/* begin file haswell.cpp */ +#ifndef SIMDJSON_SRC_HASWELL_CPP +#define SIMDJSON_SRC_HASWELL_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/haswell.h: #include */ +/* begin file simdjson/haswell.h */ +#ifndef SIMDJSON_HASWELL_H +#define SIMDJSON_HASWELL_H + +/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including simdjson/generic/amalgamated.h for haswell: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for haswell: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for haswell */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for haswell */ +/* including simdjson/generic/jsoncharutils.h for haswell: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for haswell */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for haswell */ +/* including simdjson/generic/atomparsing.h for haswell: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for haswell */ +/* including simdjson/generic/dom_parser_implementation.h for haswell: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for haswell */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace haswell +} // namespace simdjson + +namespace simdjson { +namespace haswell { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for haswell */ +/* including simdjson/generic/implementation_simdjson_result_base.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +/* including simdjson/generic/numberparsing.h for haswell: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for haswell */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +/* end file simdjson/generic/amalgamated.h for haswell */ +/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ + +#endif // SIMDJSON_HASWELL_H +/* end file simdjson/haswell.h */ +/* including simdjson/haswell/implementation.h: #include */ +/* begin file simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file simdjson/haswell/implementation.h */ + +/* including simdjson/haswell/begin.h: #include */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including generic/amalgamated.h for haswell: #include */ +/* begin file generic/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for haswell: #include */ +/* begin file generic/base.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for haswell */ +/* including generic/dom_parser_implementation.h for haswell: #include */ +/* begin file generic/dom_parser_implementation.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace haswell { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for haswell */ +/* including generic/json_character_block.h for haswell: #include */ +/* begin file generic/json_character_block.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for haswell */ +/* end file generic/amalgamated.h for haswell */ +/* including generic/stage1/amalgamated.h for haswell: #include */ +/* begin file generic/stage1/amalgamated.h for haswell */ +// Stuff other things depend on +/* including generic/stage1/base.h for haswell: #include */ +/* begin file generic/stage1/base.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for haswell */ +/* including generic/stage1/buf_block_reader.h for haswell: #include */ +/* begin file generic/stage1/buf_block_reader.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for haswell */ +/* including generic/stage1/json_escape_scanner.h for haswell: #include */ +/* begin file generic/stage1/json_escape_scanner.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for haswell */ +/* including generic/stage1/json_string_scanner.h for haswell: #include */ +/* begin file generic/stage1/json_string_scanner.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for haswell */ +/* including generic/stage1/utf8_lookup4_algorithm.h for haswell: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for haswell */ +/* including generic/stage1/json_scanner.h for haswell: #include */ +/* begin file generic/stage1/json_scanner.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for haswell */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for haswell: #include */ +/* begin file generic/stage1/find_next_document_index.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for haswell */ +/* including generic/stage1/json_minifier.h for haswell: #include */ +/* begin file generic/stage1/json_minifier.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for haswell */ +/* including generic/stage1/json_structural_indexer.h for haswell: #include */ +/* begin file generic/stage1/json_structural_indexer.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for haswell */ +/* including generic/stage1/utf8_validator.h for haswell: #include */ +/* begin file generic/stage1/utf8_validator.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for haswell */ +/* end file generic/stage1/amalgamated.h for haswell */ +/* including generic/stage2/amalgamated.h for haswell: #include */ +/* begin file generic/stage2/amalgamated.h for haswell */ +// Stuff other things depend on +/* including generic/stage2/base.h for haswell: #include */ +/* begin file generic/stage2/base.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for haswell */ +/* including generic/stage2/tape_writer.h for haswell: #include */ +/* begin file generic/stage2/tape_writer.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for haswell */ +/* including generic/stage2/logger.h for haswell: #include */ +/* begin file generic/stage2/logger.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace haswell { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for haswell */ +/* including generic/stage2/stringparsing.h for haswell: #include */ +/* begin file generic/stage2/stringparsing.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for haswell */ +/* including generic/stage2/structural_iterator.h for haswell: #include */ +/* begin file generic/stage2/structural_iterator.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for haswell */ +/* including generic/stage2/tape_builder.h for haswell: #include */ +/* begin file generic/stage2/tape_builder.h for haswell */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace haswell { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for haswell */ +/* end file generic/stage2/amalgamated.h for haswell */ + +// +// Stage 1 +// + +namespace simdjson { +namespace haswell { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + const uint64_t whitespace = in.eq({ + _mm256_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm256_shuffle_epi8(whitespace_table, in.chunks[1]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm256_shuffle_epi8(op_table, in.chunks[0]), + _mm256_shuffle_epi8(op_table, in.chunks[1]) + }); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace haswell { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return haswell::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return haswell::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return haswell::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return haswell::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return haswell::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace haswell +} // namespace simdjson + +/* including simdjson/haswell/end.h: #include */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ + +#endif // SIMDJSON_SRC_HASWELL_CPP +/* end file haswell.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_ICELAKE +/* including icelake.cpp: #include */ +/* begin file icelake.cpp */ +#ifndef SIMDJSON_SRC_ICELAKE_CPP +#define SIMDJSON_SRC_ICELAKE_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/icelake.h: #include */ +/* begin file simdjson/icelake.h */ +#ifndef SIMDJSON_ICELAKE_H +#define SIMDJSON_ICELAKE_H + +/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including simdjson/generic/amalgamated.h for icelake: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for icelake: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for icelake */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for icelake */ +/* including simdjson/generic/jsoncharutils.h for icelake: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for icelake */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for icelake */ +/* including simdjson/generic/atomparsing.h for icelake: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for icelake */ +/* including simdjson/generic/dom_parser_implementation.h for icelake: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for icelake */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace icelake +} // namespace simdjson + +namespace simdjson { +namespace icelake { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for icelake */ +/* including simdjson/generic/implementation_simdjson_result_base.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +/* including simdjson/generic/numberparsing.h for icelake: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for icelake */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +/* end file simdjson/generic/amalgamated.h for icelake */ +/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ +/* begin file simdjson/icelake/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ + +#endif // SIMDJSON_ICELAKE_H +/* end file simdjson/icelake.h */ +/* including simdjson/icelake/implementation.h: #include */ +/* begin file simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file simdjson/icelake/implementation.h */ + +// defining SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER allows us to provide our own bit_indexer::write +#define SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +/* including simdjson/icelake/begin.h: #include */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including generic/amalgamated.h for icelake: #include */ +/* begin file generic/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for icelake: #include */ +/* begin file generic/base.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for icelake */ +/* including generic/dom_parser_implementation.h for icelake: #include */ +/* begin file generic/dom_parser_implementation.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace icelake { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for icelake */ +/* including generic/json_character_block.h for icelake: #include */ +/* begin file generic/json_character_block.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for icelake */ +/* end file generic/amalgamated.h for icelake */ +/* including generic/stage1/amalgamated.h for icelake: #include */ +/* begin file generic/stage1/amalgamated.h for icelake */ +// Stuff other things depend on +/* including generic/stage1/base.h for icelake: #include */ +/* begin file generic/stage1/base.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for icelake */ +/* including generic/stage1/buf_block_reader.h for icelake: #include */ +/* begin file generic/stage1/buf_block_reader.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for icelake */ +/* including generic/stage1/json_escape_scanner.h for icelake: #include */ +/* begin file generic/stage1/json_escape_scanner.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for icelake */ +/* including generic/stage1/json_string_scanner.h for icelake: #include */ +/* begin file generic/stage1/json_string_scanner.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for icelake */ +/* including generic/stage1/utf8_lookup4_algorithm.h for icelake: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for icelake */ +/* including generic/stage1/json_scanner.h for icelake: #include */ +/* begin file generic/stage1/json_scanner.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for icelake */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for icelake: #include */ +/* begin file generic/stage1/find_next_document_index.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for icelake */ +/* including generic/stage1/json_minifier.h for icelake: #include */ +/* begin file generic/stage1/json_minifier.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for icelake */ +/* including generic/stage1/json_structural_indexer.h for icelake: #include */ +/* begin file generic/stage1/json_structural_indexer.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for icelake */ +/* including generic/stage1/utf8_validator.h for icelake: #include */ +/* begin file generic/stage1/utf8_validator.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for icelake */ +/* end file generic/stage1/amalgamated.h for icelake */ +/* including generic/stage2/amalgamated.h for icelake: #include */ +/* begin file generic/stage2/amalgamated.h for icelake */ +// Stuff other things depend on +/* including generic/stage2/base.h for icelake: #include */ +/* begin file generic/stage2/base.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for icelake */ +/* including generic/stage2/tape_writer.h for icelake: #include */ +/* begin file generic/stage2/tape_writer.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for icelake */ +/* including generic/stage2/logger.h for icelake: #include */ +/* begin file generic/stage2/logger.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace icelake { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for icelake */ +/* including generic/stage2/stringparsing.h for icelake: #include */ +/* begin file generic/stage2/stringparsing.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for icelake */ +/* including generic/stage2/structural_iterator.h for icelake: #include */ +/* begin file generic/stage2/structural_iterator.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for icelake */ +/* including generic/stage2/tape_builder.h for icelake: #include */ +/* begin file generic/stage2/tape_builder.h for icelake */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace icelake { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for icelake */ +/* end file generic/stage2/amalgamated.h for icelake */ + +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +// +// Stage 1 +// + +namespace simdjson { +namespace icelake { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +// This identifies structural characters (comma, colon, braces, brackets), +// and ASCII white-space ('\r','\n','\t',' '). +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + const auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If later code only uses one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + const uint64_t whitespace = in.eq({ + _mm512_shuffle_epi8(whitespace_table, in.chunks[0]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm512_shuffle_epi8(op_table, in.chunks[0]) + }); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +/** + * We provide a custom version of bit_indexer::write using + * naked intrinsics. + * TODO: make this code more elegant. + */ +// Under GCC 12, the intrinsic _mm512_extracti32x4_epi32 may generate 'maybe uninitialized'. +// as a workaround, we disable warnings within the following function. +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +namespace simdjson { namespace icelake { namespace { namespace stage1 { +simdjson_inline void bit_indexer::write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) { return; } + + const __m512i indexes = _mm512_maskz_compress_epi8(bits, _mm512_set_epi32( + 0x3f3e3d3c, 0x3b3a3938, 0x37363534, 0x33323130, + 0x2f2e2d2c, 0x2b2a2928, 0x27262524, 0x23222120, + 0x1f1e1d1c, 0x1b1a1918, 0x17161514, 0x13121110, + 0x0f0e0d0c, 0x0b0a0908, 0x07060504, 0x03020100 + )); + const __m512i start_index = _mm512_set1_epi32(idx); + + const auto count = count_ones(bits); + __m512i t0 = _mm512_cvtepu8_epi32(_mm512_castsi512_si128(indexes)); + _mm512_storeu_si512(this->tail, _mm512_add_epi32(t0, start_index)); + + if(count > 16) { + const __m512i t1 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 1)); + _mm512_storeu_si512(this->tail + 16, _mm512_add_epi32(t1, start_index)); + if(count > 32) { + const __m512i t2 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 2)); + _mm512_storeu_si512(this->tail + 32, _mm512_add_epi32(t2, start_index)); + if(count > 48) { + const __m512i t3 = _mm512_cvtepu8_epi32(_mm512_extracti32x4_epi32(indexes, 3)); + _mm512_storeu_si512(this->tail + 48, _mm512_add_epi32(t3, start_index)); + } + } + } + this->tail += count; +} +}}}} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace icelake { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return icelake::stage1::json_minifier::minify<128>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return icelake::stage1::json_structural_indexer::index<128>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return icelake::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return icelake::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return icelake::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace icelake +} // namespace simdjson + +/* including simdjson/icelake/end.h: #include */ +/* begin file simdjson/icelake/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ + +#endif // SIMDJSON_SRC_ICELAKE_CPP +/* end file icelake.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_PPC64 +/* including ppc64.cpp: #include */ +/* begin file ppc64.cpp */ +#ifndef SIMDJSON_SRC_PPC64_CPP +#define SIMDJSON_SRC_PPC64_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/ppc64.h: #include */ +/* begin file simdjson/ppc64.h */ +#ifndef SIMDJSON_PPC64_H +#define SIMDJSON_PPC64_H + +/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including simdjson/generic/amalgamated.h for ppc64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for ppc64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for ppc64 */ +/* including simdjson/generic/jsoncharutils.h for ppc64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for ppc64 */ +/* including simdjson/generic/atomparsing.h for ppc64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for ppc64 */ +/* including simdjson/generic/dom_parser_implementation.h for ppc64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { +namespace ppc64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for ppc64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +/* including simdjson/generic/numberparsing.h for ppc64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for ppc64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +/* end file simdjson/generic/amalgamated.h for ppc64 */ +/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_PPC64_H +/* end file simdjson/ppc64.h */ +/* including simdjson/ppc64/implementation.h: #include */ +/* begin file simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file simdjson/ppc64/implementation.h */ + +/* including simdjson/ppc64/begin.h: #include */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including generic/amalgamated.h for ppc64: #include */ +/* begin file generic/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for ppc64: #include */ +/* begin file generic/base.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for ppc64 */ +/* including generic/dom_parser_implementation.h for ppc64: #include */ +/* begin file generic/dom_parser_implementation.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace ppc64 { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for ppc64 */ +/* including generic/json_character_block.h for ppc64: #include */ +/* begin file generic/json_character_block.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for ppc64 */ +/* end file generic/amalgamated.h for ppc64 */ +/* including generic/stage1/amalgamated.h for ppc64: #include */ +/* begin file generic/stage1/amalgamated.h for ppc64 */ +// Stuff other things depend on +/* including generic/stage1/base.h for ppc64: #include */ +/* begin file generic/stage1/base.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for ppc64 */ +/* including generic/stage1/buf_block_reader.h for ppc64: #include */ +/* begin file generic/stage1/buf_block_reader.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for ppc64 */ +/* including generic/stage1/json_escape_scanner.h for ppc64: #include */ +/* begin file generic/stage1/json_escape_scanner.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for ppc64 */ +/* including generic/stage1/json_string_scanner.h for ppc64: #include */ +/* begin file generic/stage1/json_string_scanner.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for ppc64 */ +/* including generic/stage1/utf8_lookup4_algorithm.h for ppc64: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for ppc64 */ +/* including generic/stage1/json_scanner.h for ppc64: #include */ +/* begin file generic/stage1/json_scanner.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for ppc64 */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for ppc64: #include */ +/* begin file generic/stage1/find_next_document_index.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for ppc64 */ +/* including generic/stage1/json_minifier.h for ppc64: #include */ +/* begin file generic/stage1/json_minifier.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for ppc64 */ +/* including generic/stage1/json_structural_indexer.h for ppc64: #include */ +/* begin file generic/stage1/json_structural_indexer.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for ppc64 */ +/* including generic/stage1/utf8_validator.h for ppc64: #include */ +/* begin file generic/stage1/utf8_validator.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for ppc64 */ +/* end file generic/stage1/amalgamated.h for ppc64 */ +/* including generic/stage2/amalgamated.h for ppc64: #include */ +/* begin file generic/stage2/amalgamated.h for ppc64 */ +// Stuff other things depend on +/* including generic/stage2/base.h for ppc64: #include */ +/* begin file generic/stage2/base.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for ppc64 */ +/* including generic/stage2/tape_writer.h for ppc64: #include */ +/* begin file generic/stage2/tape_writer.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for ppc64 */ +/* including generic/stage2/logger.h for ppc64: #include */ +/* begin file generic/stage2/logger.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace ppc64 { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for ppc64 */ +/* including generic/stage2/stringparsing.h for ppc64: #include */ +/* begin file generic/stage2/stringparsing.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for ppc64 */ +/* including generic/stage2/structural_iterator.h for ppc64: #include */ +/* begin file generic/stage2/structural_iterator.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for ppc64 */ +/* including generic/stage2/tape_builder.h for ppc64: #include */ +/* begin file generic/stage2/tape_builder.h for ppc64 */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace ppc64 { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for ppc64 */ +/* end file generic/stage2/amalgamated.h for ppc64 */ + +// +// Stage 1 +// +namespace simdjson { +namespace ppc64 { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + const simd8 table1(16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0); + const simd8 table2(8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0); + + simd8x64 v( + (in.chunks[0] & 0xf).lookup_16(table1) & (in.chunks[0].shr<4>()).lookup_16(table2), + (in.chunks[1] & 0xf).lookup_16(table1) & (in.chunks[1].shr<4>()).lookup_16(table2), + (in.chunks[2] & 0xf).lookup_16(table1) & (in.chunks[2].shr<4>()).lookup_16(table2), + (in.chunks[3] & 0xf).lookup_16(table1) & (in.chunks[3].shr<4>()).lookup_16(table2) + ); + + uint64_t op = simd8x64( + v.chunks[0].any_bits_set(0x7), + v.chunks[1].any_bits_set(0x7), + v.chunks[2].any_bits_set(0x7), + v.chunks[3].any_bits_set(0x7) + ).to_bitmask(); + + uint64_t whitespace = simd8x64( + v.chunks[0].any_bits_set(0x18), + v.chunks[1].any_bits_set(0x18), + v.chunks[2].any_bits_set(0x18), + v.chunks[3].any_bits_set(0x18) + ).to_bitmask(); + + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + // careful: 0x80 is not ascii. + return input.reduce_or().saturating_sub(0x7fu).bits_not_set_anywhere(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// +namespace simdjson { +namespace ppc64 { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return ppc64::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return ppc64::stage1::json_structural_indexer::index<64>(buf, len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return ppc64::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return ppc64::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return ppc64::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace ppc64 +} // namespace simdjson + +/* including simdjson/ppc64/end.h: #include */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_SRC_PPC64_CPP +/* end file ppc64.cpp */ +#endif +#if SIMDJSON_IMPLEMENTATION_WESTMERE +/* including westmere.cpp: #include */ +/* begin file westmere.cpp */ +#ifndef SIMDJSON_SRC_WESTMERE_CPP +#define SIMDJSON_SRC_WESTMERE_CPP + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* including simdjson/westmere.h: #include */ +/* begin file simdjson/westmere.h */ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + +/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including simdjson/generic/amalgamated.h for westmere: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for westmere: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for westmere */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for westmere */ +/* including simdjson/generic/jsoncharutils.h for westmere: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for westmere */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for westmere */ +/* including simdjson/generic/atomparsing.h for westmere: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for westmere */ +/* including simdjson/generic/dom_parser_implementation.h for westmere: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for westmere */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +namespace westmere { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for westmere */ +/* including simdjson/generic/implementation_simdjson_result_base.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +/* including simdjson/generic/numberparsing.h for westmere: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for westmere */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +/* end file simdjson/generic/amalgamated.h for westmere */ +/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_WESTMERE_H +/* end file simdjson/westmere.h */ +/* including simdjson/westmere/implementation.h: #include */ +/* begin file simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/implementation.h */ + +/* including simdjson/westmere/begin.h: #include */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including generic/amalgamated.h for westmere: #include */ +/* begin file generic/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_SRC_GENERIC_DEPENDENCIES_H) +#error generic/dependencies.h must be included before generic/amalgamated.h! +#endif + +/* including generic/base.h for westmere: #include */ +/* begin file generic/base.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +struct json_character_block; + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_BASE_H +/* end file generic/base.h for westmere */ +/* including generic/dom_parser_implementation.h for westmere: #include */ +/* begin file generic/dom_parser_implementation.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// Interface a dom parser implementation must fulfill +namespace simdjson { +namespace westmere { +namespace { + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3); +simdjson_inline bool is_ascii(const simd8x64& input); + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file generic/dom_parser_implementation.h for westmere */ +/* including generic/json_character_block.h for westmere: #include */ +/* begin file generic/json_character_block.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +struct json_character_block { + static simdjson_inline json_character_block classify(const simd::simd8x64& in); + + simdjson_inline uint64_t whitespace() const noexcept { return _whitespace; } + simdjson_inline uint64_t op() const noexcept { return _op; } + simdjson_inline uint64_t scalar() const noexcept { return ~(op() | whitespace()); } + + uint64_t _whitespace; + uint64_t _op; +}; + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_JSON_CHARACTER_BLOCK_H +/* end file generic/json_character_block.h for westmere */ +/* end file generic/amalgamated.h for westmere */ +/* including generic/stage1/amalgamated.h for westmere: #include */ +/* begin file generic/stage1/amalgamated.h for westmere */ +// Stuff other things depend on +/* including generic/stage1/base.h for westmere: #include */ +/* begin file generic/stage1/base.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class bit_indexer; +template +struct buf_block_reader; +struct json_block; +class json_minifier; +class json_scanner; +struct json_string_block; +class json_string_scanner; +class json_structural_indexer; + +} // namespace stage1 + +namespace utf8_validation { +struct utf8_checker; +} // namespace utf8_validation + +using utf8_validation::utf8_checker; + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BASE_H +/* end file generic/stage1/base.h for westmere */ +/* including generic/stage1/buf_block_reader.h for westmere: #include */ +/* begin file generic/stage1/buf_block_reader.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +// Walks through a buffer in block-sized increments, loading the last part with spaces +template +struct buf_block_reader { +public: + simdjson_inline buf_block_reader(const uint8_t *_buf, size_t _len); + simdjson_inline size_t block_index(); + simdjson_inline bool has_full_block() const; + simdjson_inline const uint8_t *full_block() const; + /** + * Get the last block, padded with spaces. + * + * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this + * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there + * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding. + * + * @return the number of effective characters in the last block. + */ + simdjson_inline size_t get_remainder(uint8_t *dst) const; + simdjson_inline void advance(); +private: + const uint8_t *buf; + const size_t len; + const size_t lenminusstep; + size_t idx; +}; + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text_64(const uint8_t *text) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i); i++) { + buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]); + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +// Routines to print masks and text for debugging bitmask operations +simdjson_unused static char * format_input_text(const simd8x64& in) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] < ' ') { buf[i] = '_'; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_input_text(const simd8x64& in, uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + in.store(reinterpret_cast(buf)); + for (size_t i=0; i); i++) { + if (buf[i] <= ' ') { buf[i] = '_'; } + if (!(mask & (size_t(1) << i))) { buf[i] = ' '; } + } + buf[sizeof(simd8x64)] = '\0'; + return buf; +} + +simdjson_unused static char * format_mask(uint64_t mask) { + static char buf[sizeof(simd8x64) + 1]; + for (size_t i=0; i<64; i++) { + buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' '; + } + buf[64] = '\0'; + return buf; +} + +template +simdjson_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {} + +template +simdjson_inline size_t buf_block_reader::block_index() { return idx; } + +template +simdjson_inline bool buf_block_reader::has_full_block() const { + return idx < lenminusstep; +} + +template +simdjson_inline const uint8_t *buf_block_reader::full_block() const { + return &buf[idx]; +} + +template +simdjson_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const { + if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers + std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once. + std::memcpy(dst, buf + idx, len - idx); + return len - idx; +} + +template +simdjson_inline void buf_block_reader::advance() { + idx += STEP_SIZE; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_BUF_BLOCK_READER_H +/* end file generic/stage1/buf_block_reader.h for westmere */ +/* including generic/stage1/json_escape_scanner.h for westmere: #include */ +/* begin file generic/stage1/json_escape_scanner.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_ESCAPE_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * Scans for escape characters in JSON, taking care with multiple backslashes (\\n vs. \n). + */ +struct json_escape_scanner { + /** The actual escape characters (the backslashes themselves). */ + uint64_t next_is_escaped = 0ULL; + + struct escaped_and_escape { + /** + * Mask of escaped characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 0100100010100101000 + * n \ \ n \ \ + * ``` + */ + uint64_t escaped; + /** + * Mask of escape characters. + * + * ``` + * \n \\n \\\n \\\\n \ + * 1001000101001010001 + * \ \ \ \ \ \ \ + * ``` + */ + uint64_t escape; + }; + + /** + * Get a mask of both escape and escaped characters (the characters following a backslash). + * + * @param potential_escape A mask of the character that can escape others (but could be + * escaped itself). e.g. block.eq('\\') + */ + simdjson_really_inline escaped_and_escape next(uint64_t backslash) noexcept { + +#if !SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT + if (!backslash) { return {next_escaped_without_backslashes(), 0}; } +#endif + + // | | Mask (shows characters instead of 1's) | Depth | Instructions | + // |--------------------------------|----------------------------------------|-------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | | | + // | | ` even odd even odd odd` | | | + // | potential_escape | ` \ \\\ \\\ \\\\ \\\\ \\\` | 1 | 1 (backslash & ~first_is_escaped) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 5 | 5 (next_escape_and_terminal_code()) + // | escaped | `\ \ n \ n \ \ \ \ \ ` X | 6 | 7 (escape_and_terminal_code ^ (potential_escape | first_is_escaped)) + // | escape | ` \ \ \ \ \ \ \ \ \ \` | 6 | 8 (escape_and_terminal_code & backslash) + // | first_is_escaped | `\ ` | 7 (*) | 9 (escape >> 63) () + // (*) this is not needed until the next iteration + uint64_t escape_and_terminal_code = next_escape_and_terminal_code(backslash & ~this->next_is_escaped); + uint64_t escaped = escape_and_terminal_code ^ (backslash | this->next_is_escaped); + uint64_t escape = escape_and_terminal_code & backslash; + this->next_is_escaped = escape >> 63; + return {escaped, escape}; + } + +private: + static constexpr const uint64_t ODD_BITS = 0xAAAAAAAAAAAAAAAAULL; + + simdjson_really_inline uint64_t next_escaped_without_backslashes() noexcept { + uint64_t escaped = this->next_is_escaped; + this->next_is_escaped = 0; + return escaped; + } + + /** + * Returns a mask of the next escape characters (masking out escaped backslashes), along with + * any non-backslash escape codes. + * + * \n \\n \\\n \\\\n returns: + * \n \ \ \n \ \ + * 11 100 1011 10100 + * + * You are expected to mask out the first bit yourself if the previous block had a trailing + * escape. + * + * & the result with potential_escape to get just the escape characters. + * ^ the result with (potential_escape | first_is_escaped) to get escaped characters. + */ + static simdjson_really_inline uint64_t next_escape_and_terminal_code(uint64_t potential_escape) noexcept { + // If we were to just shift and mask out any odd bits, we'd actually get a *half* right answer: + // any even-aligned backslash runs would be correct! Odd-aligned backslash runs would be + // inverted (\\\ would be 010 instead of 101). + // + // ``` + // string: | ____\\\\_\\\\_____ | + // maybe_escaped | ODD | \ \ \ \ | + // even-aligned ^^^ ^^^^ odd-aligned + // ``` + // + // Taking that into account, our basic strategy is: + // + // 1. Use subtraction to produce a mask with 1's for even-aligned runs and 0's for + // odd-aligned runs. + // 2. XOR all odd bits, which masks out the odd bits in even-aligned runs, and brings IN the + // odd bits in odd-aligned runs. + // 3. & with backslash to clean up any stray bits. + // runs are set to 0, and then XORing with "odd": + // + // | | Mask (shows characters instead of 1's) | Instructions | + // |--------------------------------|----------------------------------------|---------------------| + // | string | `\\n_\\\n___\\\n___\\\\___\\\\__\\\` | + // | | ` even odd even odd odd` | + // | maybe_escaped | ` n \\n \\n \\\_ \\\_ \\` X | 1 (potential_escape << 1) + // | maybe_escaped_and_odd | ` \n_ \\n _ \\\n_ _ \\\__ _\\\_ \\\` | 1 (maybe_escaped | odd) + // | even_series_codes_and_odd | ` n_\\\ _ n_ _\\\\ _ _ ` | 1 (maybe_escaped_and_odd - potential_escape) + // | escape_and_terminal_code | ` \n \ \n \ \n \ \ \ \ \ \` | 1 (^ odd) + // + + // Escaped characters are characters following an escape. + uint64_t maybe_escaped = potential_escape << 1; + + // To distinguish odd from even escape sequences, therefore, we turn on any *starting* + // escapes that are on an odd byte. (We actually bring in all odd bits, for speed.) + // - Odd runs of backslashes are 0000, and the code at the end ("n" in \n or \\n) is 1. + // - Odd runs of backslashes are 1111, and the code at the end ("n" in \n or \\n) is 0. + // - All other odd bytes are 1, and even bytes are 0. + uint64_t maybe_escaped_and_odd_bits = maybe_escaped | ODD_BITS; + uint64_t even_series_codes_and_odd_bits = maybe_escaped_and_odd_bits - potential_escape; + + // Now we flip all odd bytes back with xor. This: + // - Makes odd runs of backslashes go from 0000 to 1010 + // - Makes even runs of backslashes go from 1111 to 1010 + // - Sets actually-escaped codes to 1 (the n in \n and \\n: \n = 11, \\n = 100) + // - Resets all other bytes to 0 + return even_series_codes_and_odd_bits ^ ODD_BITS; + } +}; + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_escape_scanner.h for westmere */ +/* including generic/stage1/json_string_scanner.h for westmere: #include */ +/* begin file generic/stage1/json_string_scanner.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +struct json_string_block { + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_really_inline json_string_block(uint64_t escaped, uint64_t quote, uint64_t in_string) : + _escaped(escaped), _quote(quote), _in_string(in_string) {} + + // Escaped characters (characters following an escape() character) + simdjson_really_inline uint64_t escaped() const { return _escaped; } + // Real (non-backslashed) quotes + simdjson_really_inline uint64_t quote() const { return _quote; } + // Only characters inside the string (not including the quotes) + simdjson_really_inline uint64_t string_content() const { return _in_string & ~_quote; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_inside_string(uint64_t mask) const { return mask & _in_string; } + // Return a mask of whether the given characters are inside a string (only works on non-quotes) + simdjson_really_inline uint64_t non_quote_outside_string(uint64_t mask) const { return mask & ~_in_string; } + // Tail of string (everything except the start quote) + simdjson_really_inline uint64_t string_tail() const { return _in_string ^ _quote; } + + // escaped characters (backslashed--does not include the hex characters after \u) + uint64_t _escaped; + // real quotes (non-escaped ones) + uint64_t _quote; + // string characters (includes start quote but not end quote) + uint64_t _in_string; +}; + +// Scans blocks for string characters, storing the state necessary to do so +class json_string_scanner { +public: + simdjson_really_inline json_string_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_really_inline error_code finish(); + +private: + // Scans for escape characters + json_escape_scanner escape_scanner{}; + // Whether the last iteration was still inside a string (all 1's = true, all 0's = false). + uint64_t prev_in_string = 0ULL; +}; + +// +// Return a mask of all string characters plus end quotes. +// +// prev_escaped is overflow saying whether the next character is escaped. +// prev_in_string is overflow saying whether we're still in a string. +// +// Backslash sequences outside of quotes will be detected in stage 2. +// +simdjson_really_inline json_string_block json_string_scanner::next(const simd::simd8x64& in) { + const uint64_t backslash = in.eq('\\'); + const uint64_t escaped = escape_scanner.next(backslash).escaped; + const uint64_t quote = in.eq('"') & ~escaped; + + // + // prefix_xor flips on bits inside the string (and flips off the end quote). + // + // Then we xor with prev_in_string: if we were in a string already, its effect is flipped + // (characters inside strings are outside, and characters outside strings are inside). + // + const uint64_t in_string = prefix_xor(quote) ^ prev_in_string; + + // + // Check if we're still in a string at the end of the box so the next block will know + // + prev_in_string = uint64_t(static_cast(in_string) >> 63); + + // Use ^ to turn the beginning quote off, and the end quote on. + + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_string_block(escaped, quote, in_string); +} + +simdjson_really_inline error_code json_string_scanner::finish() { + if (prev_in_string) { + return UNCLOSED_STRING; + } + return SUCCESS; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRING_SCANNER_H +/* end file generic/stage1/json_string_scanner.h for westmere */ +/* including generic/stage1/utf8_lookup4_algorithm.h for westmere: #include */ +/* begin file generic/stage1/utf8_lookup4_algorithm.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace utf8_validation { + +using namespace simd; + + simdjson_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) { +// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII) +// Bit 1 = Too Long (ASCII followed by continuation) +// Bit 2 = Overlong 3-byte +// Bit 4 = Surrogate +// Bit 5 = Overlong 2-byte +// Bit 7 = Two Continuations + constexpr const uint8_t TOO_SHORT = 1<<0; // 11______ 0_______ + // 11______ 11______ + constexpr const uint8_t TOO_LONG = 1<<1; // 0_______ 10______ + constexpr const uint8_t OVERLONG_3 = 1<<2; // 11100000 100_____ + constexpr const uint8_t SURROGATE = 1<<4; // 11101101 101_____ + constexpr const uint8_t OVERLONG_2 = 1<<5; // 1100000_ 10______ + constexpr const uint8_t TWO_CONTS = 1<<7; // 10______ 10______ + constexpr const uint8_t TOO_LARGE = 1<<3; // 11110100 1001____ + // 11110100 101_____ + // 11110101 1001____ + // 11110101 101_____ + // 1111011_ 1001____ + // 1111011_ 101_____ + // 11111___ 1001____ + // 11111___ 101_____ + constexpr const uint8_t TOO_LARGE_1000 = 1<<6; + // 11110101 1000____ + // 1111011_ 1000____ + // 11111___ 1000____ + constexpr const uint8_t OVERLONG_4 = 1<<6; // 11110000 1000____ + + const simd8 byte_1_high = prev1.shr<4>().lookup_16( + // 0_______ ________ + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, + // 10______ ________ + TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS, + // 1100____ ________ + TOO_SHORT | OVERLONG_2, + // 1101____ ________ + TOO_SHORT, + // 1110____ ________ + TOO_SHORT | OVERLONG_3 | SURROGATE, + // 1111____ ________ + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4 + ); + constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 . + const simd8 byte_1_low = (prev1 & 0x0F).lookup_16( + // ____0000 ________ + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + // ____0001 ________ + CARRY | OVERLONG_2, + // ____001_ ________ + CARRY, + CARRY, + + // ____0100 ________ + CARRY | TOO_LARGE, + // ____0101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____011_ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + + // ____1___ ________ + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + // ____1101 ________ + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 + ); + const simd8 byte_2_high = input.shr<4>().lookup_16( + // ________ 0_______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, + + // ________ 1000____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + // ________ 1001____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + // ________ 101_____ + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + + // ________ 11______ + TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT + ); + return (byte_1_high & byte_1_low & byte_2_high); + } + simdjson_inline simd8 check_multibyte_lengths(const simd8 input, + const simd8 prev_input, const simd8 sc) { + simd8 prev2 = input.prev<2>(prev_input); + simd8 prev3 = input.prev<3>(prev_input); + simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3)); + simd8 must23_80 = must23 & uint8_t(0x80); + return must23_80 ^ sc; + } + + // + // Return nonzero if there are incomplete multibyte characters at the end of the block: + // e.g. if there is a 4-byte character, but it's 3 bytes from the end. + // + simdjson_inline simd8 is_incomplete(const simd8 input) { + // If the previous input's last 3 bytes match this, they're too short (they ended at EOF): + // ... 1111____ 111_____ 11______ +#if SIMDJSON_IMPLEMENTATION_ICELAKE + static const uint8_t max_array[64] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#else + static const uint8_t max_array[32] = { + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 0xf0u-1, 0xe0u-1, 0xc0u-1 + }; +#endif + const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]); + return input.gt_bits(max_value); + } + + struct utf8_checker { + // If this is nonzero, there has been a UTF-8 error. + simd8 error; + // The last input we received + simd8 prev_input_block; + // Whether the last input we received was incomplete (used for ASCII fast path) + simd8 prev_incomplete; + + // + // Check whether the current bytes are valid UTF-8. + // + simdjson_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) { + // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes + // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers) + simd8 prev1 = input.prev<1>(prev_input); + simd8 sc = check_special_cases(input, prev1); + this->error |= check_multibyte_lengths(input, prev_input, sc); + } + + // The only problem that can happen at EOF is that a multibyte character is too short + // or a byte value too large in the last bytes: check_special_cases only checks for bytes + // too large in the first of two bytes. + simdjson_inline void check_eof() { + // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't + // possibly finish them. + this->error |= this->prev_incomplete; + } + +#ifndef SIMDJSON_IF_CONSTEXPR +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_IF_CONSTEXPR if constexpr +#else +#define SIMDJSON_IF_CONSTEXPR if +#endif +#endif + + simdjson_inline void check_next_input(const simd8x64& input) { + if(simdjson_likely(is_ascii(input))) { + this->error |= this->prev_incomplete; + } else { + // you might think that a for-loop would work, but under Visual Studio, it is not good enough. + static_assert((simd8x64::NUM_CHUNKS == 1) + ||(simd8x64::NUM_CHUNKS == 2) + || (simd8x64::NUM_CHUNKS == 4), + "We support one, two or four chunks per 64-byte block."); + SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 1) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 2) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + } else SIMDJSON_IF_CONSTEXPR (simd8x64::NUM_CHUNKS == 4) { + this->check_utf8_bytes(input.chunks[0], this->prev_input_block); + this->check_utf8_bytes(input.chunks[1], input.chunks[0]); + this->check_utf8_bytes(input.chunks[2], input.chunks[1]); + this->check_utf8_bytes(input.chunks[3], input.chunks[2]); + } + this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]); + this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1]; + } + } + // do not forget to call check_eof! + simdjson_inline error_code errors() { + return this->error.any_bits_set_anywhere() ? error_code::UTF8_ERROR : error_code::SUCCESS; + } + + }; // struct utf8_checker +} // namespace utf8_validation + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_LOOKUP4_ALGORITHM_H +/* end file generic/stage1/utf8_lookup4_algorithm.h for westmere */ +/* including generic/stage1/json_scanner.h for westmere: #include */ +/* begin file generic/stage1/json_scanner.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * A block of scanned json, with information on operators and scalars. + * + * We seek to identify pseudo-structural characters. Anything that is inside + * a string must be omitted (hence & ~_string.string_tail()). + * Otherwise, pseudo-structural characters come in two forms. + * 1. We have the structural characters ([,],{,},:, comma). The + * term 'structural character' is from the JSON RFC. + * 2. We have the 'scalar pseudo-structural characters'. + * Scalars are quotes, and any character except structural characters and white space. + * + * To identify the scalar pseudo-structural characters, we must look at what comes + * before them: it must be a space, a quote or a structural characters. + * Starting with simdjson v0.3, we identify them by + * negation: we identify everything that is followed by a non-quote scalar, + * and we negate that. Whatever remains must be a 'scalar pseudo-structural character'. + */ +struct json_block { +public: + // We spell out the constructors in the hope of resolving inlining issues with Visual Studio 2017 + simdjson_inline json_block(json_string_block&& string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(std::move(string)), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + simdjson_inline json_block(json_string_block string, json_character_block characters, uint64_t follows_potential_nonquote_scalar) : + _string(string), _characters(characters), _follows_potential_nonquote_scalar(follows_potential_nonquote_scalar) {} + + /** + * The start of structurals. + * In simdjson prior to v0.3, these were called the pseudo-structural characters. + **/ + simdjson_inline uint64_t structural_start() const noexcept { return potential_structural_start() & ~_string.string_tail(); } + /** All JSON whitespace (i.e. not in a string) */ + simdjson_inline uint64_t whitespace() const noexcept { return non_quote_outside_string(_characters.whitespace()); } + + // Helpers + + /** Whether the given characters are inside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_inside_string(uint64_t mask) const noexcept { return _string.non_quote_inside_string(mask); } + /** Whether the given characters are outside a string (only works on non-quotes) */ + simdjson_inline uint64_t non_quote_outside_string(uint64_t mask) const noexcept { return _string.non_quote_outside_string(mask); } + + // string and escape characters + json_string_block _string; + // whitespace, structural characters ('operators'), scalars + json_character_block _characters; + // whether the previous character was a scalar + uint64_t _follows_potential_nonquote_scalar; +private: + // Potential structurals (i.e. disregarding strings) + + /** + * structural elements ([,],{,},:, comma) plus scalar starts like 123, true and "abc". + * They may reside inside a string. + **/ + simdjson_inline uint64_t potential_structural_start() const noexcept { return _characters.op() | potential_scalar_start(); } + /** + * The start of non-operator runs, like 123, true and "abc". + * It main reside inside a string. + **/ + simdjson_inline uint64_t potential_scalar_start() const noexcept { + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // Whenever it is preceded by something that is not a structural element ({,},[,],:, ") nor a white-space + // then we know that it is irrelevant structurally. + return _characters.scalar() & ~follows_potential_scalar(); + } + /** + * Whether the given character is immediately after a non-operator like 123, true. + * The characters following a quote are not included. + */ + simdjson_inline uint64_t follows_potential_scalar() const noexcept { + // _follows_potential_nonquote_scalar: is defined as marking any character that follows a character + // that is not a structural element ({,},[,],:, comma) nor a quote (") and that is not a + // white space. + // It is understood that within quoted region, anything at all could be marked (irrelevant). + return _follows_potential_nonquote_scalar; + } +}; + +/** + * Scans JSON for important bits: structural characters or 'operators', strings, and scalars. + * + * The scanner starts by calculating two distinct things: + * - string characters (taking \" into account) + * - structural characters or 'operators' ([]{},:, comma) + * and scalars (runs of non-operators like 123, true and "abc") + * + * To minimize data dependency (a key component of the scanner's speed), it finds these in parallel: + * in particular, the operator/scalar bit will find plenty of things that are actually part of + * strings. When we're done, json_block will fuse the two together by masking out tokens that are + * part of a string. + */ +class json_scanner { +public: + json_scanner() = default; + simdjson_inline json_block next(const simd::simd8x64& in); + // Returns either UNCLOSED_STRING or SUCCESS + simdjson_inline error_code finish(); + +private: + // Whether the last character of the previous iteration is part of a scalar token + // (anything except whitespace or a structural character/'operator'). + uint64_t prev_scalar = 0ULL; + json_string_scanner string_scanner{}; +}; + + +// +// Check if the current character immediately follows a matching character. +// +// For example, this checks for quotes with backslashes in front of them: +// +// const uint64_t backslashed_quote = in.eq('"') & immediately_follows(in.eq('\'), prev_backslash); +// +simdjson_inline uint64_t follows(const uint64_t match, uint64_t &overflow) { + const uint64_t result = match << 1 | overflow; + overflow = match >> 63; + return result; +} + +simdjson_inline json_block json_scanner::next(const simd::simd8x64& in) { + json_string_block strings = string_scanner.next(in); + // identifies the white-space and the structural characters + json_character_block characters = json_character_block::classify(in); + // The term "scalar" refers to anything except structural characters and white space + // (so letters, numbers, quotes). + // We want follows_scalar to mark anything that follows a non-quote scalar (so letters and numbers). + // + // A terminal quote should either be followed by a structural character (comma, brace, bracket, colon) + // or nothing. However, we still want ' "a string"true ' to mark the 't' of 'true' as a potential + // pseudo-structural character just like we would if we had ' "a string" true '; otherwise we + // may need to add an extra check when parsing strings. + // + // Performance: there are many ways to skin this cat. + const uint64_t nonquote_scalar = characters.scalar() & ~strings.quote(); + uint64_t follows_nonquote_scalar = follows(nonquote_scalar, prev_scalar); + // We are returning a function-local object so either we get a move constructor + // or we get copy elision. + return json_block( + strings,// strings is a function-local object so either it moves or the copy is elided. + characters, + follows_nonquote_scalar + ); +} + +simdjson_inline error_code json_scanner::finish() { + return string_scanner.finish(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_SCANNER_H +/* end file generic/stage1/json_scanner.h for westmere */ + +// All other declarations +/* including generic/stage1/find_next_document_index.h for westmere: #include */ +/* begin file generic/stage1/find_next_document_index.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * This algorithm is used to quickly identify the last structural position that + * makes up a complete document. + * + * It does this by going backwards and finding the last *document boundary* (a + * place where one value follows another without a comma between them). If the + * last document (the characters after the boundary) has an equal number of + * start and end brackets, it is considered complete. + * + * Simply put, we iterate over the structural characters, starting from + * the end. We consider that we found the end of a JSON document when the + * first element of the pair is NOT one of these characters: '{' '[' ':' ',' + * and when the second element is NOT one of these characters: '}' ']' ':' ','. + * + * This simple comparison works most of the time, but it does not cover cases + * where the batch's structural indexes contain a perfect amount of documents. + * In such a case, we do not have access to the structural index which follows + * the last document, therefore, we do not have access to the second element in + * the pair, and that means we cannot identify the last document. To fix this + * issue, we keep a count of the open and closed curly/square braces we found + * while searching for the pair. When we find a pair AND the count of open and + * closed curly/square braces is the same, we know that we just passed a + * complete document, therefore the last json buffer location is the end of the + * batch. + */ +simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &parser) { + // Variant: do not count separately, just figure out depth + if(parser.n_structural_indexes == 0) { return 0; } + auto arr_cnt = 0; + auto obj_cnt = 0; + for (auto i = parser.n_structural_indexes - 1; i > 0; i--) { + auto idxb = parser.structural_indexes[i]; + switch (parser.buf[idxb]) { + case ':': + case ',': + continue; + case '}': + obj_cnt--; + continue; + case ']': + arr_cnt--; + continue; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + auto idxa = parser.structural_indexes[i - 1]; + switch (parser.buf[idxa]) { + case '{': + case '[': + case ':': + case ',': + continue; + } + // Last document is complete, so the next document will appear after! + if (!arr_cnt && !obj_cnt) { + return parser.n_structural_indexes; + } + // Last document is incomplete; mark the document at i + 1 as the next one + return i; + } + // If we made it to the end, we want to finish counting to see if we have a full document. + switch (parser.buf[parser.structural_indexes[0]]) { + case '}': + obj_cnt--; + break; + case ']': + arr_cnt--; + break; + case '{': + obj_cnt++; + break; + case '[': + arr_cnt++; + break; + } + if (!arr_cnt && !obj_cnt) { + // We have a complete document. + return parser.n_structural_indexes; + } + return 0; +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_FIND_NEXT_DOCUMENT_INDEX_H +/* end file generic/stage1/find_next_document_index.h for westmere */ +/* including generic/stage1/json_minifier.h for westmere: #include */ +/* begin file generic/stage1/json_minifier.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class json_minifier { +public: + template + static error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept; + +private: + simdjson_inline json_minifier(uint8_t *_dst) + : dst{_dst} + {} + template + simdjson_inline void step(const uint8_t *block_buf, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block); + simdjson_inline error_code finish(uint8_t *dst_start, size_t &dst_len); + json_scanner scanner{}; + uint8_t *dst; +}; + +simdjson_inline void json_minifier::next(const simd::simd8x64& in, const json_block& block) { + uint64_t mask = block.whitespace(); + dst += in.compress(mask, dst); +} + +simdjson_inline error_code json_minifier::finish(uint8_t *dst_start, size_t &dst_len) { + error_code error = scanner.finish(); + if (error) { dst_len = 0; return error; } + dst_len = dst - dst_start; + return SUCCESS; +} + +template<> +simdjson_inline void json_minifier::step<128>(const uint8_t *block_buf, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + simd::simd8x64 in_2(block_buf+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1); + this->next(in_2, block_2); + reader.advance(); +} + +template<> +simdjson_inline void json_minifier::step<64>(const uint8_t *block_buf, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block_buf); + json_block block_1 = scanner.next(in_1); + this->next(block_buf, block_1); + reader.advance(); +} + +template +error_code json_minifier::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) noexcept { + buf_block_reader reader(buf, len); + json_minifier minifier(dst); + + // Index the first n-1 blocks + while (reader.has_full_block()) { + minifier.step(reader.full_block(), reader); + } + + // Index the last (remainder) block, padded with spaces + uint8_t block[STEP_SIZE]; + size_t remaining_bytes = reader.get_remainder(block); + if (remaining_bytes > 0) { + // We do not want to write directly to the output stream. Rather, we write + // to a local buffer (for safety). + uint8_t out_block[STEP_SIZE]; + uint8_t * const guarded_dst{minifier.dst}; + minifier.dst = out_block; + minifier.step(block, reader); + size_t to_write = minifier.dst - out_block; + // In some cases, we could be enticed to consider the padded spaces + // as part of the string. This is fine as long as we do not write more + // than we consumed. + if(to_write > remaining_bytes) { to_write = remaining_bytes; } + memcpy(guarded_dst, out_block, to_write); + minifier.dst = guarded_dst + to_write; + } + return minifier.finish(dst, dst_len); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_MINIFIER_H +/* end file generic/stage1/json_minifier.h for westmere */ +/* including generic/stage1/json_structural_indexer.h for westmere: #include */ +/* begin file generic/stage1/json_structural_indexer.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses in stage1 +// It is intended to be included multiple times and compiled multiple times +// We assume the file in which it is included already includes +// "simdjson/stage1.h" (this simplifies amalgation) + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +class bit_indexer { +public: + uint32_t *tail; + + simdjson_inline bit_indexer(uint32_t *index_buf) : tail(index_buf) {} + + // flatten out values in 'bits' assuming that they are are to have values of idx + // plus their position in the bitvector, and store these indexes at + // base_ptr[base] incrementing base as we go + // will potentially store extra values beyond end of valid bits, so base_ptr + // needs to be large enough to handle this + // + // If the kernel sets SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER, then it + // will provide its own version of the code. +#ifdef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + simdjson_inline void write(uint32_t idx, uint64_t bits); +#else + simdjson_inline void write(uint32_t idx, uint64_t bits) { + // In some instances, the next branch is expensive because it is mispredicted. + // Unfortunately, in other cases, + // it helps tremendously. + if (bits == 0) + return; +#if SIMDJSON_PREFER_REVERSE_BITS + /** + * ARM lacks a fast trailing zero instruction, but it has a fast + * bit reversal instruction and a fast leading zero instruction. + * Thus it may be profitable to reverse the bits (once) and then + * to rely on a sequence of instructions that call the leading + * zero instruction. + * + * Performance notes: + * The chosen routine is not optimal in terms of data dependency + * since zero_leading_bit might require two instructions. However, + * it tends to minimize the total number of instructions which is + * beneficial. + */ + + uint64_t rev_bits = reverse_bits(bits); + int cnt = static_cast(count_ones(bits)); + int i = 0; + // Do the first 8 all together + for (; i<8; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + i = 8; + for (; i<16; i++) { + int lz = leading_zeroes(rev_bits); + this->tail[i] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + i = 16; + while (rev_bits != 0) { + int lz = leading_zeroes(rev_bits); + this->tail[i++] = static_cast(idx) + lz; + rev_bits = zero_leading_bit(rev_bits, lz); + } + } + } + this->tail += cnt; +#else // SIMDJSON_PREFER_REVERSE_BITS + /** + * Under recent x64 systems, we often have both a fast trailing zero + * instruction and a fast 'clear-lower-bit' instruction so the following + * algorithm can be competitive. + */ + + int cnt = static_cast(count_ones(bits)); + // Do the first 8 all together + for (int i=0; i<8; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Do the next 8 all together (we hope in most cases it won't happen at all + // and the branch is easily predicted). + if (simdjson_unlikely(cnt > 8)) { + for (int i=8; i<16; i++) { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + } + + // Most files don't have 16+ structurals per block, so we take several basically guaranteed + // branch mispredictions here. 16+ structurals per block means either punctuation ({} [] , :) + // or the start of a value ("abc" true 123) every four characters. + if (simdjson_unlikely(cnt > 16)) { + int i = 16; + do { + this->tail[i] = idx + trailing_zeroes(bits); + bits = clear_lowest_bit(bits); + i++; + } while (i < cnt); + } + } + + this->tail += cnt; +#endif + } +#endif // SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +}; + +class json_structural_indexer { +public: + /** + * Find the important bits of JSON in a 128-byte chunk, and add them to structural_indexes. + * + * @param partial Setting the partial parameter to true allows the find_structural_bits to + * tolerate unclosed strings. The caller should still ensure that the input is valid UTF-8. If + * you are processing substrings, you may want to call on a function like trimmed_length_safe_utf8. + */ + template + static error_code index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept; + +private: + simdjson_inline json_structural_indexer(uint32_t *structural_indexes); + template + simdjson_inline void step(const uint8_t *block, buf_block_reader &reader) noexcept; + simdjson_inline void next(const simd::simd8x64& in, const json_block& block, size_t idx); + simdjson_inline error_code finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial); + + json_scanner scanner{}; + utf8_checker checker{}; + bit_indexer indexer; + uint64_t prev_structurals = 0; + uint64_t unescaped_chars_error = 0; +}; + +simdjson_inline json_structural_indexer::json_structural_indexer(uint32_t *structural_indexes) : indexer{structural_indexes} {} + +// Skip the last character if it is partial +simdjson_inline size_t trim_partial_utf8(const uint8_t *buf, size_t len) { + if (simdjson_unlikely(len < 3)) { + switch (len) { + case 2: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 2 bytes left + return len; + case 1: + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + return len; + case 0: + return len; + } + } + if (buf[len-1] >= 0xc0) { return len-1; } // 2-, 3- and 4-byte characters with only 1 byte left + if (buf[len-2] >= 0xe0) { return len-2; } // 3- and 4-byte characters with only 1 byte left + if (buf[len-3] >= 0xf0) { return len-3; } // 4-byte characters with only 3 bytes left + return len; +} + +// +// PERF NOTES: +// We pipe 2 inputs through these stages: +// 1. Load JSON into registers. This takes a long time and is highly parallelizable, so we load +// 2 inputs' worth at once so that by the time step 2 is looking for them input, it's available. +// 2. Scan the JSON for critical data: strings, scalars and operators. This is the critical path. +// The output of step 1 depends entirely on this information. These functions don't quite use +// up enough CPU: the second half of the functions is highly serial, only using 1 execution core +// at a time. The second input's scans has some dependency on the first ones finishing it, but +// they can make a lot of progress before they need that information. +// 3. Step 1 doesn't use enough capacity, so we run some extra stuff while we're waiting for that +// to finish: utf-8 checks and generating the output from the last iteration. +// +// The reason we run 2 inputs at a time, is steps 2 and 3 are *still* not enough to soak up all +// available capacity with just one input. Running 2 at a time seems to give the CPU a good enough +// workout. +// +template +error_code json_structural_indexer::index(const uint8_t *buf, size_t len, dom_parser_implementation &parser, stage1_mode partial) noexcept { + if (simdjson_unlikely(len > parser.capacity())) { return CAPACITY; } + // We guard the rest of the code so that we can assume that len > 0 throughout. + if (len == 0) { return EMPTY; } + if (is_streaming(partial)) { + len = trim_partial_utf8(buf, len); + // If you end up with an empty window after trimming + // the partial UTF-8 bytes, then chances are good that you + // have an UTF-8 formatting error. + if(len == 0) { return UTF8_ERROR; } + } + buf_block_reader reader(buf, len); + json_structural_indexer indexer(parser.structural_indexes.get()); + + // Read all but the last block + while (reader.has_full_block()) { + indexer.step(reader.full_block(), reader); + } + // Take care of the last block (will always be there unless file is empty which is + // not supposed to happen.) + uint8_t block[STEP_SIZE]; + if (simdjson_unlikely(reader.get_remainder(block) == 0)) { return UNEXPECTED_ERROR; } + indexer.step(block, reader); + return indexer.finish(parser, reader.block_index(), len, partial); +} + +template<> +simdjson_inline void json_structural_indexer::step<128>(const uint8_t *block, buf_block_reader<128> &reader) noexcept { + simd::simd8x64 in_1(block); + simd::simd8x64 in_2(block+64); + json_block block_1 = scanner.next(in_1); + json_block block_2 = scanner.next(in_2); + this->next(in_1, block_1, reader.block_index()); + this->next(in_2, block_2, reader.block_index()+64); + reader.advance(); +} + +template<> +simdjson_inline void json_structural_indexer::step<64>(const uint8_t *block, buf_block_reader<64> &reader) noexcept { + simd::simd8x64 in_1(block); + json_block block_1 = scanner.next(in_1); + this->next(in_1, block_1, reader.block_index()); + reader.advance(); +} + +simdjson_inline void json_structural_indexer::next(const simd::simd8x64& in, const json_block& block, size_t idx) { + uint64_t unescaped = in.lteq(0x1F); +#if SIMDJSON_UTF8VALIDATION + checker.check_next_input(in); +#endif + indexer.write(uint32_t(idx-64), prev_structurals); // Output *last* iteration's structurals to the parser + prev_structurals = block.structural_start(); + unescaped_chars_error |= block.non_quote_inside_string(unescaped); +} + +simdjson_inline error_code json_structural_indexer::finish(dom_parser_implementation &parser, size_t idx, size_t len, stage1_mode partial) { + // Write out the final iteration's structurals + indexer.write(uint32_t(idx-64), prev_structurals); + error_code error = scanner.finish(); + // We deliberately break down the next expression so that it is + // human readable. + const bool should_we_exit = is_streaming(partial) ? + ((error != SUCCESS) && (error != UNCLOSED_STRING)) // when partial we tolerate UNCLOSED_STRING + : (error != SUCCESS); // if partial is false, we must have SUCCESS + const bool have_unclosed_string = (error == UNCLOSED_STRING); + if (simdjson_unlikely(should_we_exit)) { return error; } + + if (unescaped_chars_error) { + return UNESCAPED_CHARS; + } + parser.n_structural_indexes = uint32_t(indexer.tail - parser.structural_indexes.get()); + /*** + * The On Demand API requires special padding. + * + * This is related to https://github.com/simdjson/simdjson/issues/906 + * Basically, we want to make sure that if the parsing continues beyond the last (valid) + * structural character, it quickly stops. + * Only three structural characters can be repeated without triggering an error in JSON: [,] and }. + * We repeat the padding character (at 'len'). We don't know what it is, but if the parsing + * continues, then it must be [,] or }. + * Suppose it is ] or }. We backtrack to the first character, what could it be that would + * not trigger an error? It could be ] or } but no, because you can't start a document that way. + * It can't be a comma, a colon or any simple value. So the only way we could continue is + * if the repeated character is [. But if so, the document must start with [. But if the document + * starts with [, it should end with ]. If we enforce that rule, then we would get + * ][[ which is invalid. + * + * This is illustrated with the test array_iterate_unclosed_error() on the following input: + * R"({ "a": [,,)" + **/ + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); // used later in partial == stage1_mode::streaming_final + parser.structural_indexes[parser.n_structural_indexes + 1] = uint32_t(len); + parser.structural_indexes[parser.n_structural_indexes + 2] = 0; + parser.next_structural_index = 0; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + return EMPTY; + } + if (simdjson_unlikely(parser.structural_indexes[parser.n_structural_indexes - 1] > len)) { + return UNEXPECTED_ERROR; + } + if (partial == stage1_mode::streaming_partial) { + // If we have an unclosed string, then the last structural + // will be the quote and we want to make sure to omit it. + if(have_unclosed_string) { + parser.n_structural_indexes--; + // a valid JSON file cannot have zero structural indexes - we should have found something + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { return CAPACITY; } + } + // We truncate the input to the end of the last complete document (or zero). + auto new_structural_indexes = find_next_document_index(parser); + if (new_structural_indexes == 0 && parser.n_structural_indexes > 0) { + if(parser.structural_indexes[0] == 0) { + // If the buffer is partial and we started at index 0 but the document is + // incomplete, it's too big to parse. + return CAPACITY; + } else { + // It is possible that the document could be parsed, we just had a lot + // of white space. + parser.n_structural_indexes = 0; + return EMPTY; + } + } + + parser.n_structural_indexes = new_structural_indexes; + } else if (partial == stage1_mode::streaming_final) { + if(have_unclosed_string) { parser.n_structural_indexes--; } + // We truncate the input to the end of the last complete document (or zero). + // Because partial == stage1_mode::streaming_final, it means that we may + // silently ignore trailing garbage. Though it sounds bad, we do it + // deliberately because many people who have streams of JSON documents + // will truncate them for processing. E.g., imagine that you are uncompressing + // the data from a size file or receiving it in chunks from the network. You + // may not know where exactly the last document will be. Meanwhile the + // document_stream instances allow people to know the JSON documents they are + // parsing (see the iterator.source() method). + parser.n_structural_indexes = find_next_document_index(parser); + // We store the initial n_structural_indexes so that the client can see + // whether we used truncation. If initial_n_structural_indexes == parser.n_structural_indexes, + // then this will query parser.structural_indexes[parser.n_structural_indexes] which is len, + // otherwise, it will copy some prior index. + parser.structural_indexes[parser.n_structural_indexes + 1] = parser.structural_indexes[parser.n_structural_indexes]; + // This next line is critical, do not change it unless you understand what you are + // doing. + parser.structural_indexes[parser.n_structural_indexes] = uint32_t(len); + if (simdjson_unlikely(parser.n_structural_indexes == 0u)) { + // We tolerate an unclosed string at the very end of the stream. Indeed, users + // often load their data in bulk without being careful and they want us to ignore + // the trailing garbage. + return EMPTY; + } + } + checker.check_eof(); + return checker.errors(); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +// Clear CUSTOM_BIT_INDEXER so other implementations can set it if they need to. +#undef SIMDJSON_GENERIC_JSON_STRUCTURAL_INDEXER_CUSTOM_BIT_INDEXER + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_JSON_STRUCTURAL_INDEXER_H +/* end file generic/stage1/json_structural_indexer.h for westmere */ +/* including generic/stage1/utf8_validator.h for westmere: #include */ +/* begin file generic/stage1/utf8_validator.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage1 { + +/** + * Validates that the string is actual UTF-8. + */ +template +bool generic_validate_utf8(const uint8_t * input, size_t length) { + checker c{}; + buf_block_reader<64> reader(input, length); + while (reader.has_full_block()) { + simd::simd8x64 in(reader.full_block()); + c.check_next_input(in); + reader.advance(); + } + uint8_t block[64]{}; + reader.get_remainder(block); + simd::simd8x64 in(block); + c.check_next_input(in); + reader.advance(); + c.check_eof(); + return c.errors() == error_code::SUCCESS; +} + +bool generic_validate_utf8(const char * input, size_t length) { + return generic_validate_utf8(reinterpret_cast(input),length); +} + +} // namespace stage1 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE1_UTF8_VALIDATOR_H +/* end file generic/stage1/utf8_validator.h for westmere */ +/* end file generic/stage1/amalgamated.h for westmere */ +/* including generic/stage2/amalgamated.h for westmere: #include */ +/* begin file generic/stage2/amalgamated.h for westmere */ +// Stuff other things depend on +/* including generic/stage2/base.h for westmere: #include */ +/* begin file generic/stage2/base.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_BASE_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +class json_iterator; +class structural_iterator; +struct tape_builder; +struct tape_writer; + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_BASE_H +/* end file generic/stage2/base.h for westmere */ +/* including generic/stage2/tape_writer.h for westmere: #include */ +/* begin file generic/stage2/tape_writer.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +struct tape_writer { + /** The next place to write to tape */ + uint64_t *next_tape_loc; + + /** Write a signed 64-bit value to tape. */ + simdjson_inline void append_s64(int64_t value) noexcept; + + /** Write an unsigned 64-bit value to tape. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + + /** Write a double value to tape. */ + simdjson_inline void append_double(double value) noexcept; + + /** + * Append a tape entry (an 8-bit type,and 56 bits worth of value). + */ + simdjson_inline void append(uint64_t val, internal::tape_type t) noexcept; + + /** + * Skip the current tape entry without writing. + * + * Used to skip the start of the container, since we'll come back later to fill it in when the + * container ends. + */ + simdjson_inline void skip() noexcept; + + /** + * Skip the number of tape entries necessary to write a large u64 or i64. + */ + simdjson_inline void skip_large_integer() noexcept; + + /** + * Skip the number of tape entries necessary to write a double. + */ + simdjson_inline void skip_double() noexcept; + + /** + * Write a value to a known location on tape. + * + * Used to go back and write out the start of a container after the container ends. + */ + simdjson_inline static void write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept; + +private: + /** + * Append both the tape entry, and a supplementary value following it. Used for types that need + * all 64 bits, such as double and uint64_t. + */ + template + simdjson_inline void append2(uint64_t val, T val2, internal::tape_type t) noexcept; +}; // struct tape_writer + +simdjson_inline void tape_writer::append_s64(int64_t value) noexcept { + append2(0, value, internal::tape_type::INT64); +} + +simdjson_inline void tape_writer::append_u64(uint64_t value) noexcept { + append(0, internal::tape_type::UINT64); + *next_tape_loc = value; + next_tape_loc++; +} + +/** Write a double value to tape. */ +simdjson_inline void tape_writer::append_double(double value) noexcept { + append2(0, value, internal::tape_type::DOUBLE); +} + +simdjson_inline void tape_writer::skip() noexcept { + next_tape_loc++; +} + +simdjson_inline void tape_writer::skip_large_integer() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::skip_double() noexcept { + next_tape_loc += 2; +} + +simdjson_inline void tape_writer::append(uint64_t val, internal::tape_type t) noexcept { + *next_tape_loc = val | ((uint64_t(char(t))) << 56); + next_tape_loc++; +} + +template +simdjson_inline void tape_writer::append2(uint64_t val, T val2, internal::tape_type t) noexcept { + append(val, t); + static_assert(sizeof(val2) == sizeof(*next_tape_loc), "Type is not 64 bits!"); + memcpy(next_tape_loc, &val2, sizeof(val2)); + next_tape_loc++; +} + +simdjson_inline void tape_writer::write(uint64_t &tape_loc, uint64_t val, internal::tape_type t) noexcept { + tape_loc = val | ((uint64_t(char(t))) << 56); +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_WRITER_H +/* end file generic/stage2/tape_writer.h for westmere */ +/* including generic/stage2/logger.h for westmere: #include */ +/* begin file generic/stage2/logger.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_LOGGER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + + +// This is for an internal-only stage 2 specific logger. +// Set LOG_ENABLED = true to log what stage 2 is doing! +namespace simdjson { +namespace westmere { +namespace { +namespace logger { + + static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + static constexpr const int LOG_EVENT_LEN = 20; + static constexpr const int LOG_BUFFER_LEN = 30; + static constexpr const int LOG_SMALL_BUFFER_LEN = 10; + static constexpr const int LOG_INDEX_LEN = 5; + + static int log_depth; // Not threadsafe. Log only. + + // Helper to turn unprintable or newline characters into spaces + static simdjson_inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } + } + + // Print the header and set up log_start + static simdjson_inline void log_start() { + if (LOG_ENABLED) { + log_depth = 0; + printf("\n"); + printf("| %-*s | %-*s | %-*s | %-*s | Detail |\n", LOG_EVENT_LEN, "Event", LOG_BUFFER_LEN, "Buffer", LOG_SMALL_BUFFER_LEN, "Next", 5, "Next#"); + printf("|%.*s|%.*s|%.*s|%.*s|--------|\n", LOG_EVENT_LEN+2, DASHES, LOG_BUFFER_LEN+2, DASHES, LOG_SMALL_BUFFER_LEN+2, DASHES, 5+2, DASHES); + } + } + + simdjson_unused static simdjson_inline void log_string(const char *message) { + if (LOG_ENABLED) { + printf("%s\n", message); + } + } + + // Logs a single line from the stage 2 DOM parser + template + static simdjson_inline void log_line(S &structurals, const char *title_prefix, const char *title, const char *detail) { + if (LOG_ENABLED) { + printf("| %*s%s%-*s ", log_depth*2, "", title_prefix, LOG_EVENT_LEN - log_depth*2 - int(strlen(title_prefix)), title); + auto current_index = structurals.at_beginning() ? nullptr : structurals.next_structural-1; + auto next_index = structurals.next_structural; + auto current = current_index ? &structurals.buf[*current_index] : reinterpret_cast(" "); + auto next = &structurals.buf[*next_index]; + { + // Print the next N characters in the buffer. + printf("| "); + // Otherwise, print the characters starting from the buffer position. + // Print spaces for unprintable or newline characters. + for (int i=0;i */ +/* begin file generic/stage2/json_iterator.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +class json_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + uint32_t depth{0}; + + /** + * Walk the JSON document. + * + * The visitor receives callbacks when values are encountered. All callbacks pass the iterator as + * the first parameter; some callbacks have other parameters as well: + * + * - visit_document_start() - at the beginning. + * - visit_document_end() - at the end (if things were successful). + * + * - visit_array_start() - at the start `[` of a non-empty array. + * - visit_array_end() - at the end `]` of a non-empty array. + * - visit_empty_array() - when an empty array is encountered. + * + * - visit_object_end() - at the start `]` of a non-empty object. + * - visit_object_start() - at the end `]` of a non-empty object. + * - visit_empty_object() - when an empty object is encountered. + * - visit_key(const uint8_t *key) - when a key in an object field is encountered. key is + * guaranteed to point at the first quote of the string (`"key"`). + * - visit_primitive(const uint8_t *value) - when a value is a string, number, boolean or null. + * - visit_root_primitive(iter, uint8_t *value) - when the top-level value is a string, number, boolean or null. + * + * - increment_count(iter) - each time a value is found in an array or object. + */ + template + simdjson_warn_unused simdjson_inline error_code walk_document(V &visitor) noexcept; + + /** + * Create an iterator capable of walking a JSON document. + * + * The document must have already passed through stage 1. + */ + simdjson_inline json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index); + + /** + * Look at the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *peek() const noexcept; + /** + * Advance to the next token. + * + * Tokens can be strings, numbers, booleans, null, or operators (`[{]},:`)). + * + * They may include invalid JSON as well (such as `1.2.3` or `ture`). + */ + simdjson_inline const uint8_t *advance() noexcept; + /** + * Get the remaining length of the document, from the start of the current token. + */ + simdjson_inline size_t remaining_len() const noexcept; + /** + * Check if we are at the end of the document. + * + * If this is true, there are no more tokens. + */ + simdjson_inline bool at_eof() const noexcept; + /** + * Check if we are at the beginning of the document. + */ + simdjson_inline bool at_beginning() const noexcept; + simdjson_inline uint8_t last_structural() const noexcept; + + /** + * Log that a value has been found. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_value(const char *type) const noexcept; + /** + * Log the start of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_start_value(const char *type) const noexcept; + /** + * Log the end of a multipart value. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_end_value(const char *type) const noexcept; + /** + * Log an error. + * + * Set LOG_ENABLED=true in logger.h to see logging. + */ + simdjson_inline void log_error(const char *error) const noexcept; + + template + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(V &visitor, const uint8_t *value) noexcept; + template + simdjson_warn_unused simdjson_inline error_code visit_primitive(V &visitor, const uint8_t *value) noexcept; +}; + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::walk_document(V &visitor) noexcept { + logger::log_start(); + + // + // Start the document + // + if (at_eof()) { return EMPTY; } + log_start_value("document"); + SIMDJSON_TRY( visitor.visit_document_start(*this) ); + + // + // Read first value + // + { + auto value = advance(); + + // Make sure the outer object or array is closed before continuing; otherwise, there are ways we + // could get into memory corruption. See https://github.com/simdjson/simdjson/issues/906 + if (!STREAMING) { + switch (*value) { + case '{': if (last_structural() != '}') { log_value("starting brace unmatched"); return TAPE_ERROR; }; break; + case '[': if (last_structural() != ']') { log_value("starting bracket unmatched"); return TAPE_ERROR; }; break; + } + } + + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_root_primitive(*this, value) ); break; + } + } + goto document_end; + +// +// Object parser states +// +object_begin: + log_start_value("object"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = false; + SIMDJSON_TRY( visitor.visit_object_start(*this) ); + + { + auto key = advance(); + if (*key != '"') { log_error("Object does not start with a key"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.increment_count(*this) ); + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + +object_field: + if (simdjson_unlikely( *advance() != ':' )) { log_error("Missing colon after key in object"); return TAPE_ERROR; } + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +object_continue: + switch (*advance()) { + case ',': + SIMDJSON_TRY( visitor.increment_count(*this) ); + { + auto key = advance(); + if (simdjson_unlikely( *key != '"' )) { log_error("Key string missing at beginning of field in object"); return TAPE_ERROR; } + SIMDJSON_TRY( visitor.visit_key(*this, key) ); + } + goto object_field; + case '}': log_end_value("object"); SIMDJSON_TRY( visitor.visit_object_end(*this) ); goto scope_end; + default: log_error("No comma between object fields"); return TAPE_ERROR; + } + +scope_end: + depth--; + if (depth == 0) { goto document_end; } + if (dom_parser.is_array[depth]) { goto array_continue; } + goto object_continue; + +// +// Array parser states +// +array_begin: + log_start_value("array"); + depth++; + if (depth >= dom_parser.max_depth()) { log_error("Exceeded max depth!"); return DEPTH_ERROR; } + dom_parser.is_array[depth] = true; + SIMDJSON_TRY( visitor.visit_array_start(*this) ); + SIMDJSON_TRY( visitor.increment_count(*this) ); + +array_value: + { + auto value = advance(); + switch (*value) { + case '{': if (*peek() == '}') { advance(); log_value("empty object"); SIMDJSON_TRY( visitor.visit_empty_object(*this) ); break; } goto object_begin; + case '[': if (*peek() == ']') { advance(); log_value("empty array"); SIMDJSON_TRY( visitor.visit_empty_array(*this) ); break; } goto array_begin; + default: SIMDJSON_TRY( visitor.visit_primitive(*this, value) ); break; + } + } + +array_continue: + switch (*advance()) { + case ',': SIMDJSON_TRY( visitor.increment_count(*this) ); goto array_value; + case ']': log_end_value("array"); SIMDJSON_TRY( visitor.visit_array_end(*this) ); goto scope_end; + default: log_error("Missing comma between array values"); return TAPE_ERROR; + } + +document_end: + log_end_value("document"); + SIMDJSON_TRY( visitor.visit_document_end(*this) ); + + dom_parser.next_structural_index = uint32_t(next_structural - &dom_parser.structural_indexes[0]); + + // If we didn't make it to the end, it's an error + if ( !STREAMING && dom_parser.next_structural_index != dom_parser.n_structural_indexes ) { + log_error("More than one JSON value at the root of the document, or extra characters at the end of the JSON!"); + return TAPE_ERROR; + } + + return SUCCESS; + +} // walk_document() + +simdjson_inline json_iterator::json_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { +} + +simdjson_inline const uint8_t *json_iterator::peek() const noexcept { + return &buf[*(next_structural)]; +} +simdjson_inline const uint8_t *json_iterator::advance() noexcept { + return &buf[*(next_structural++)]; +} +simdjson_inline size_t json_iterator::remaining_len() const noexcept { + return dom_parser.len - *(next_structural-1); +} + +simdjson_inline bool json_iterator::at_eof() const noexcept { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; +} +simdjson_inline bool json_iterator::at_beginning() const noexcept { + return next_structural == dom_parser.structural_indexes.get(); +} +simdjson_inline uint8_t json_iterator::last_structural() const noexcept { + return buf[dom_parser.structural_indexes[dom_parser.n_structural_indexes - 1]]; +} + +simdjson_inline void json_iterator::log_value(const char *type) const noexcept { + logger::log_line(*this, "", type, ""); +} + +simdjson_inline void json_iterator::log_start_value(const char *type) const noexcept { + logger::log_line(*this, "+", type, ""); + if (logger::LOG_ENABLED) { logger::log_depth++; } +} + +simdjson_inline void json_iterator::log_end_value(const char *type) const noexcept { + if (logger::LOG_ENABLED) { logger::log_depth--; } + logger::log_line(*this, "-", type, ""); +} + +simdjson_inline void json_iterator::log_error(const char *error) const noexcept { + logger::log_line(*this, "", "ERROR", error); +} + +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_root_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_root_string(*this, value); + case 't': return visitor.visit_root_true_atom(*this, value); + case 'f': return visitor.visit_root_false_atom(*this, value); + case 'n': return visitor.visit_root_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_root_number(*this, value); + default: + log_error("Document starts with a non-value character"); + return TAPE_ERROR; + } +} +template +simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V &visitor, const uint8_t *value) noexcept { + switch (*value) { + case '"': return visitor.visit_string(*this, value); + case 't': return visitor.visit_true_atom(*this, value); + case 'f': return visitor.visit_false_atom(*this, value); + case 'n': return visitor.visit_null_atom(*this, value); + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return visitor.visit_number(*this, value); + default: + log_error("Non-value found when value was expected!"); + return TAPE_ERROR; + } +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_JSON_ITERATOR_H +/* end file generic/stage2/json_iterator.h for westmere */ +/* including generic/stage2/stringparsing.h for westmere: #include */ +/* begin file generic/stage2/stringparsing.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This file contains the common code every implementation uses +// It is intended to be included multiple times and compiled multiple times + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace stringparsing { + +// begin copypasta +// These chars yield themselves: " \ / +// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab +// u not handled in this table as it's complex +static const uint8_t escape_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4. + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5. + 0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6. + 0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7. + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +// handle a unicode codepoint +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint(const uint8_t **src_ptr, + uint8_t **dst_ptr, bool allow_replacement) { + // Use the default Unicode Character 'REPLACEMENT CHARACTER' (U+FFFD) + constexpr uint32_t substitution_code_point = 0xfffd; + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) != ((static_cast ('\\') << 8) | static_cast ('u'))) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + + // We have already checked that the high surrogate is valid and + // (code_point - 0xd800) < 1024. + // + // Check that code_point_2 is in the range 0xdc00..0xdfff + // and that code_point_2 was parsed from valid hex. + uint32_t low_bit = code_point_2 - 0xdc00; + if (low_bit >> 10) { + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } else { + code_point = (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + + } + } else if (code_point >= 0xdc00 && code_point <= 0xdfff) { + // If we encounter a low surrogate (not preceded by a high surrogate) + // then we have an error. + if(!allow_replacement) { return false; } + code_point = substitution_code_point; + } + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +// handle a unicode codepoint using the wobbly convention +// https://simonsapin.github.io/wtf-8/ +// write appropriate values into dest +// src will advance 6 bytes or 12 bytes +// dest will advance a variable amount (return via pointer) +// return true if the unicode codepoint was valid +// We work in little-endian then swap at write time +simdjson_warn_unused +simdjson_inline bool handle_unicode_codepoint_wobbly(const uint8_t **src_ptr, + uint8_t **dst_ptr) { + // It is not ideal that this function is nearly identical to handle_unicode_codepoint. + // + // jsoncharutils::hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the + // conversion isn't valid; we defer the check for this to inside the + // multilingual plane check + uint32_t code_point = jsoncharutils::hex_to_u32_nocheck(*src_ptr + 2); + *src_ptr += 6; + // If we found a high surrogate, we must + // check for low surrogate for characters + // outside the Basic + // Multilingual Plane. + if (code_point >= 0xd800 && code_point < 0xdc00) { + const uint8_t *src_data = *src_ptr; + /* Compiler optimizations convert this to a single 16-bit load and compare on most platforms */ + if (((src_data[0] << 8) | src_data[1]) == ((static_cast ('\\') << 8) | static_cast ('u'))) { + uint32_t code_point_2 = jsoncharutils::hex_to_u32_nocheck(src_data + 2); + uint32_t low_bit = code_point_2 - 0xdc00; + if ((low_bit >> 10) == 0) { + code_point = + (((code_point - 0xd800) << 10) | low_bit) + 0x10000; + *src_ptr += 6; + } + } + } + + size_t offset = jsoncharutils::codepoint_to_utf8(code_point, *dst_ptr); + *dst_ptr += offset; + return offset > 0; +} + + +/** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + */ +simdjson_warn_unused simdjson_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) { + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint(&src, &dst, allow_replacement)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) { + // It is not ideal that this function is nearly identical to parse_string. + while (1) { + // Copy the next n bytes, and find the backslash and quote in them. + auto bs_quote = backslash_and_quote::copy_and_find(src, dst); + // If the next thing is the end quote, copy and return + if (bs_quote.has_quote_first()) { + // we encountered quotes first. Move dst to point to quotes and exit + return dst + bs_quote.quote_index(); + } + if (bs_quote.has_backslash()) { + /* find out where the backspace is */ + auto bs_dist = bs_quote.backslash_index(); + uint8_t escape_char = src[bs_dist + 1]; + /* we encountered backslash first. Handle backslash */ + if (escape_char == 'u') { + /* move src/dst up to the start; they will be further adjusted + within the unicode codepoint handling code. */ + src += bs_dist; + dst += bs_dist; + if (!handle_unicode_codepoint_wobbly(&src, &dst)) { + return nullptr; + } + } else { + /* simple 1:1 conversion. Will eat bs_dist+2 characters in input and + * write bs_dist+1 characters to output + * note this may reach beyond the part of the buffer we've actually + * seen. I think this is ok */ + uint8_t escape_result = escape_map[escape_char]; + if (escape_result == 0u) { + return nullptr; /* bogus escape value is an error */ + } + dst[bs_dist] = escape_result; + src += bs_dist + 2; + dst += bs_dist + 1; + } + } else { + /* they are the same. Since they can't co-occur, it means we + * encountered neither. */ + src += backslash_and_quote::BYTES_PROCESSED; + dst += backslash_and_quote::BYTES_PROCESSED; + } + } + /* can't be reached */ + return nullptr; +} + +} // namespace stringparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H +/* end file generic/stage2/stringparsing.h for westmere */ +/* including generic/stage2/structural_iterator.h for westmere: #include */ +/* begin file generic/stage2/structural_iterator.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +class structural_iterator { +public: + const uint8_t* const buf; + uint32_t *next_structural; + dom_parser_implementation &dom_parser; + + // Start a structural + simdjson_inline structural_iterator(dom_parser_implementation &_dom_parser, size_t start_structural_index) + : buf{_dom_parser.buf}, + next_structural{&_dom_parser.structural_indexes[start_structural_index]}, + dom_parser{_dom_parser} { + } + // Get the buffer position of the current structural character + simdjson_inline const uint8_t* current() { + return &buf[*(next_structural-1)]; + } + // Get the current structural character + simdjson_inline char current_char() { + return buf[*(next_structural-1)]; + } + // Get the next structural character without advancing + simdjson_inline char peek_next_char() { + return buf[*next_structural]; + } + simdjson_inline const uint8_t* peek() { + return &buf[*next_structural]; + } + simdjson_inline const uint8_t* advance() { + return &buf[*(next_structural++)]; + } + simdjson_inline char advance_char() { + return buf[*(next_structural++)]; + } + simdjson_inline size_t remaining_len() { + return dom_parser.len - *(next_structural-1); + } + + simdjson_inline bool at_end() { + return next_structural == &dom_parser.structural_indexes[dom_parser.n_structural_indexes]; + } + simdjson_inline bool at_beginning() { + return next_structural == dom_parser.structural_indexes.get(); + } +}; + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_STRUCTURAL_ITERATOR_H +/* end file generic/stage2/structural_iterator.h for westmere */ +/* including generic/stage2/tape_builder.h for westmere: #include */ +/* begin file generic/stage2/tape_builder.h for westmere */ +#ifndef SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #include */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace westmere { +namespace { +namespace stage2 { + +struct tape_builder { + template + simdjson_warn_unused static simdjson_inline error_code parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept; + + /** Called when a non-empty document starts. */ + simdjson_warn_unused simdjson_inline error_code visit_document_start(json_iterator &iter) noexcept; + /** Called when a non-empty document ends without error. */ + simdjson_warn_unused simdjson_inline error_code visit_document_end(json_iterator &iter) noexcept; + + /** Called when a non-empty array starts. */ + simdjson_warn_unused simdjson_inline error_code visit_array_start(json_iterator &iter) noexcept; + /** Called when a non-empty array ends. */ + simdjson_warn_unused simdjson_inline error_code visit_array_end(json_iterator &iter) noexcept; + /** Called when an empty array is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_array(json_iterator &iter) noexcept; + + /** Called when a non-empty object starts. */ + simdjson_warn_unused simdjson_inline error_code visit_object_start(json_iterator &iter) noexcept; + /** + * Called when a key in a field is encountered. + * + * primitive, visit_object_start, visit_empty_object, visit_array_start, or visit_empty_array + * will be called after this with the field value. + */ + simdjson_warn_unused simdjson_inline error_code visit_key(json_iterator &iter, const uint8_t *key) noexcept; + /** Called when a non-empty object ends. */ + simdjson_warn_unused simdjson_inline error_code visit_object_end(json_iterator &iter) noexcept; + /** Called when an empty object is found. */ + simdjson_warn_unused simdjson_inline error_code visit_empty_object(json_iterator &iter) noexcept; + + /** + * Called when a string, number, boolean or null is found. + */ + simdjson_warn_unused simdjson_inline error_code visit_primitive(json_iterator &iter, const uint8_t *value) noexcept; + /** + * Called when a string, number, boolean or null is found at the top level of a document (i.e. + * when there is no array or object and the entire document is a single string, number, boolean or + * null. + * + * This is separate from primitive() because simdjson's normal primitive parsing routines assume + * there is at least one more token after the value, which is only true in an array or object. + */ + simdjson_warn_unused simdjson_inline error_code visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_string(json_iterator &iter, const uint8_t *value, bool key = false) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + simdjson_warn_unused simdjson_inline error_code visit_root_string(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_number(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept; + simdjson_warn_unused simdjson_inline error_code visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept; + + /** Called each time a new field or element in an array or object is found. */ + simdjson_warn_unused simdjson_inline error_code increment_count(json_iterator &iter) noexcept; + + /** Next location to write to tape */ + tape_writer tape; +private: + /** Next write location in the string buf for stage 2 parsing */ + uint8_t *current_string_buf_loc; + + simdjson_inline tape_builder(dom::document &doc) noexcept; + + simdjson_inline uint32_t next_tape_index(json_iterator &iter) const noexcept; + simdjson_inline void start_container(json_iterator &iter) noexcept; + simdjson_warn_unused simdjson_inline error_code end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_warn_unused simdjson_inline error_code empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept; + simdjson_inline uint8_t *on_start_string(json_iterator &iter) noexcept; + simdjson_inline void on_end_string(uint8_t *dst) noexcept; +}; // struct tape_builder + +template +simdjson_warn_unused simdjson_inline error_code tape_builder::parse_document( + dom_parser_implementation &dom_parser, + dom::document &doc) noexcept { + dom_parser.doc = &doc; + json_iterator iter(dom_parser, STREAMING ? dom_parser.next_structural_index : 0); + tape_builder builder(doc); + return iter.walk_document(builder); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_root_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_primitive(json_iterator &iter, const uint8_t *value) noexcept { + return iter.visit_primitive(*this, value); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_object(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_empty_array(json_iterator &iter) noexcept { + return empty_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_start(json_iterator &iter) noexcept { + start_container(iter); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_object_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_OBJECT, internal::tape_type::END_OBJECT); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_array_end(json_iterator &iter) noexcept { + return end_container(iter, internal::tape_type::START_ARRAY, internal::tape_type::END_ARRAY); +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_document_end(json_iterator &iter) noexcept { + constexpr uint32_t start_tape_index = 0; + tape.append(start_tape_index, internal::tape_type::ROOT); + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter), internal::tape_type::ROOT); + return SUCCESS; +} +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_key(json_iterator &iter, const uint8_t *key) noexcept { + return visit_string(iter, key, true); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::increment_count(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].count++; // we have a key value pair in the object at parser.dom_parser.depth - 1 + return SUCCESS; +} + +simdjson_inline tape_builder::tape_builder(dom::document &doc) noexcept : tape{doc.tape.get()}, current_string_buf_loc{doc.string_buf.get()} {} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_string(json_iterator &iter, const uint8_t *value, bool key) noexcept { + iter.log_value(key ? "key" : "string"); + uint8_t *dst = on_start_string(iter); + dst = stringparsing::parse_string(value+1, dst, false); // We do not allow replacement when the escape characters are invalid. + if (dst == nullptr) { + iter.log_error("Invalid escape in string"); + return STRING_ERROR; + } + on_end_string(dst); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_string(json_iterator &iter, const uint8_t *value) noexcept { + return visit_string(iter, value); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_number(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("number"); + return numberparsing::parse_number(value, tape); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_number(json_iterator &iter, const uint8_t *value) noexcept { + // + // We need to make a copy to make sure that the string is space terminated. + // This is not about padding the input, which should already padded up + // to len + SIMDJSON_PADDING. However, we have no control at this stage + // on how the padding was done. What if the input string was padded with nulls? + // It is quite common for an input string to have an extra null character (C string). + // We do not want to allow 9\0 (where \0 is the null character) inside a JSON + // document, but the string "9\0" by itself is fine. So we make a copy and + // pad the input with spaces when we know that there is just one input element. + // This copy is relatively expensive, but it will almost never be called in + // practice unless you are in the strange scenario where you have many JSON + // documents made of single atoms. + // + std::unique_ptrcopy(new (std::nothrow) uint8_t[iter.remaining_len() + SIMDJSON_PADDING]); + if (copy.get() == nullptr) { return MEMALLOC; } + std::memcpy(copy.get(), value, iter.remaining_len()); + std::memset(copy.get() + iter.remaining_len(), ' ', SIMDJSON_PADDING); + error_code error = visit_number(iter, copy.get()); + return error; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value)) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_true_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("true"); + if (!atomparsing::is_valid_true_atom(value, iter.remaining_len())) { return T_ATOM_ERROR; } + tape.append(0, internal::tape_type::TRUE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value)) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_false_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("false"); + if (!atomparsing::is_valid_false_atom(value, iter.remaining_len())) { return F_ATOM_ERROR; } + tape.append(0, internal::tape_type::FALSE_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value)) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::visit_root_null_atom(json_iterator &iter, const uint8_t *value) noexcept { + iter.log_value("null"); + if (!atomparsing::is_valid_null_atom(value, iter.remaining_len())) { return N_ATOM_ERROR; } + tape.append(0, internal::tape_type::NULL_VALUE); + return SUCCESS; +} + +// private: + +simdjson_inline uint32_t tape_builder::next_tape_index(json_iterator &iter) const noexcept { + return uint32_t(tape.next_tape_loc - iter.dom_parser.doc->tape.get()); +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::empty_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + auto start_index = next_tape_index(iter); + tape.append(start_index+2, start); + tape.append(start_index, end); + return SUCCESS; +} + +simdjson_inline void tape_builder::start_container(json_iterator &iter) noexcept { + iter.dom_parser.open_containers[iter.depth].tape_index = next_tape_index(iter); + iter.dom_parser.open_containers[iter.depth].count = 0; + tape.skip(); // We don't actually *write* the start element until the end. +} + +simdjson_warn_unused simdjson_inline error_code tape_builder::end_container(json_iterator &iter, internal::tape_type start, internal::tape_type end) noexcept { + // Write the ending tape element, pointing at the start location + const uint32_t start_tape_index = iter.dom_parser.open_containers[iter.depth].tape_index; + tape.append(start_tape_index, end); + // Write the start tape element, pointing at the end location (and including count) + // count can overflow if it exceeds 24 bits... so we saturate + // the convention being that a cnt of 0xffffff or more is undetermined in value (>= 0xffffff). + const uint32_t count = iter.dom_parser.open_containers[iter.depth].count; + const uint32_t cntsat = count > 0xFFFFFF ? 0xFFFFFF : count; + tape_writer::write(iter.dom_parser.doc->tape[start_tape_index], next_tape_index(iter) | (uint64_t(cntsat) << 32), start); + return SUCCESS; +} + +simdjson_inline uint8_t *tape_builder::on_start_string(json_iterator &iter) noexcept { + // we advance the point, accounting for the fact that we have a NULL termination + tape.append(current_string_buf_loc - iter.dom_parser.doc->string_buf.get(), internal::tape_type::STRING); + return current_string_buf_loc + sizeof(uint32_t); +} + +simdjson_inline void tape_builder::on_end_string(uint8_t *dst) noexcept { + uint32_t str_length = uint32_t(dst - (current_string_buf_loc + sizeof(uint32_t))); + // TODO check for overflow in case someone has a crazy string (>=4GB?) + // But only add the overflow check when the document itself exceeds 4GB + // Currently unneeded because we refuse to parse docs larger or equal to 4GB. + memcpy(current_string_buf_loc, &str_length, sizeof(uint32_t)); + // NULL termination is still handy if you expect all your strings to + // be NULL terminated? It comes at a small cost + *dst = 0; + current_string_buf_loc = dst + 1; +} + +} // namespace stage2 +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_SRC_GENERIC_STAGE2_TAPE_BUILDER_H +/* end file generic/stage2/tape_builder.h for westmere */ +/* end file generic/stage2/amalgamated.h for westmere */ + +// +// Stage 1 +// + +namespace simdjson { +namespace westmere { + +simdjson_warn_unused error_code implementation::create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr& dst +) const noexcept { + dst.reset( new (std::nothrow) dom_parser_implementation() ); + if (!dst) { return MEMALLOC; } + if (auto err = dst->set_capacity(capacity)) + return err; + if (auto err = dst->set_max_depth(max_depth)) + return err; + return SUCCESS; +} + +namespace { + +using namespace simd; + +simdjson_inline json_character_block json_character_block::classify(const simd::simd8x64& in) { + // These lookups rely on the fact that anything < 127 will match the lower 4 bits, which is why + // we can't use the generic lookup_16. + auto whitespace_table = simd8::repeat_16(' ', 100, 100, 100, 17, 100, 113, 2, 100, '\t', '\n', 112, 100, '\r', 100, 100); + + // The 6 operators (:,[]{}) have these values: + // + // , 2C + // : 3A + // [ 5B + // { 7B + // ] 5D + // } 7D + // + // If you use | 0x20 to turn [ and ] into { and }, the lower 4 bits of each character is unique. + // We exploit this, using a simd 4-bit lookup to tell us which character match against, and then + // match it (against | 0x20). + // + // To prevent recognizing other characters, everything else gets compared with 0, which cannot + // match due to the | 0x20. + // + // NOTE: Due to the | 0x20, this ALSO treats and (control characters 0C and 1A) like , + // and :. This gets caught in stage 2, which checks the actual character to ensure the right + // operators are in the right places. + const auto op_table = simd8::repeat_16( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, ':', '{', // : = 3A, [ = 5B, { = 7B + ',', '}', 0, 0 // , = 2C, ] = 5D, } = 7D + ); + + // We compute whitespace and op separately. If the code later only use one or the + // other, given the fact that all functions are aggressively inlined, we can + // hope that useless computations will be omitted. This is namely case when + // minifying (we only need whitespace). + + + const uint64_t whitespace = in.eq({ + _mm_shuffle_epi8(whitespace_table, in.chunks[0]), + _mm_shuffle_epi8(whitespace_table, in.chunks[1]), + _mm_shuffle_epi8(whitespace_table, in.chunks[2]), + _mm_shuffle_epi8(whitespace_table, in.chunks[3]) + }); + // Turn [ and ] into { and } + const simd8x64 curlified{ + in.chunks[0] | 0x20, + in.chunks[1] | 0x20, + in.chunks[2] | 0x20, + in.chunks[3] | 0x20 + }; + const uint64_t op = curlified.eq({ + _mm_shuffle_epi8(op_table, in.chunks[0]), + _mm_shuffle_epi8(op_table, in.chunks[1]), + _mm_shuffle_epi8(op_table, in.chunks[2]), + _mm_shuffle_epi8(op_table, in.chunks[3]) + }); + return { whitespace, op }; +} + +simdjson_inline bool is_ascii(const simd8x64& input) { + return input.reduce_or().is_ascii(); +} + +simdjson_unused simdjson_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) { + simd8 is_second_byte = prev1.saturating_sub(0xc0u-1); // Only 11______ will be > 0 + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_second_byte | is_third_byte | is_fourth_byte) > int8_t(0); +} + +simdjson_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) { + simd8 is_third_byte = prev2.saturating_sub(0xe0u-1); // Only 111_____ will be > 0 + simd8 is_fourth_byte = prev3.saturating_sub(0xf0u-1); // Only 1111____ will be > 0 + // Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine. + return simd8(is_third_byte | is_fourth_byte) > int8_t(0); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +// +// Stage 2 +// + +// +// Implementation-specific overrides +// + +namespace simdjson { +namespace westmere { + +simdjson_warn_unused error_code implementation::minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept { + return westmere::stage1::json_minifier::minify<64>(buf, len, dst, dst_len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage1(const uint8_t *_buf, size_t _len, stage1_mode streaming) noexcept { + this->buf = _buf; + this->len = _len; + return westmere::stage1::json_structural_indexer::index<64>(_buf, _len, *this, streaming); +} + +simdjson_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept { + return westmere::stage1::generic_validate_utf8(buf,len); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { + return stage2::tape_builder::parse_document(*this, _doc); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_string(const uint8_t *src, uint8_t *dst, bool replacement_char) const noexcept { + return westmere::stringparsing::parse_string(src, dst, replacement_char); +} + +simdjson_warn_unused uint8_t *dom_parser_implementation::parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept { + return westmere::stringparsing::parse_wobbly_string(src, dst); +} + +simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t *_buf, size_t _len, dom::document &_doc) noexcept { + auto error = stage1(_buf, _len, stage1_mode::regular); + if (error) { return error; } + return stage2(_doc); +} + +} // namespace westmere +} // namespace simdjson + +/* including simdjson/westmere/end.h: #include */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_SRC_WESTMERE_CPP +/* end file westmere.cpp */ +#endif + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +/* end file simdjson.cpp */ diff --git a/src/3rd_party/simdjson/simdjson.h b/src/3rd_party/simdjson/simdjson.h new file mode 100644 index 00000000..18795965 --- /dev/null +++ b/src/3rd_party/simdjson/simdjson.h @@ -0,0 +1,87562 @@ +/* auto-generated on 2023-08-18 14:37:10 -0400. Do not edit! */ +/* including simdjson.h: */ +/* begin file simdjson.h */ +#ifndef SIMDJSON_H +#define SIMDJSON_H + +/** + * @mainpage + * + * Check the [README.md](https://github.com/simdjson/simdjson/blob/master/README.md#simdjson--parsing-gigabytes-of-json-per-second). + * + * Sample code. See https://github.com/simdjson/simdjson/blob/master/doc/basics.md for more examples. + + #include "simdjson.h" + + int main(void) { + // load from `twitter.json` file: + simdjson::dom::parser parser; + simdjson::dom::element tweets = parser.load("twitter.json"); + std::cout << tweets["search_metadata"]["count"] << " results." << std::endl; + + // Parse and iterate through an array of objects + auto abstract_json = R"( [ + { "12345" : {"a":12.34, "b":56.78, "c": 9998877} }, + { "12545" : {"a":11.44, "b":12.78, "c": 11111111} } + ] )"_padded; + + for (simdjson::dom::object obj : parser.parse(abstract_json)) { + for(const auto key_value : obj) { + cout << "key: " << key_value.key << " : "; + simdjson::dom::object innerobj = key_value.value; + cout << "a: " << double(innerobj["a"]) << ", "; + cout << "b: " << double(innerobj["b"]) << ", "; + cout << "c: " << int64_t(innerobj["c"]) << endl; + } + } + } + */ + +/* including simdjson/common_defs.h: #include "simdjson/common_defs.h" */ +/* begin file simdjson/common_defs.h */ +#ifndef SIMDJSON_COMMON_DEFS_H +#define SIMDJSON_COMMON_DEFS_H + +#include +/* including simdjson/compiler_check.h: #include "simdjson/compiler_check.h" */ +/* begin file simdjson/compiler_check.h */ +#ifndef SIMDJSON_COMPILER_CHECK_H +#define SIMDJSON_COMPILER_CHECK_H + +#ifndef __cplusplus +#error simdjson requires a C++ compiler +#endif + +#ifndef SIMDJSON_CPLUSPLUS +#if defined(_MSVC_LANG) && !defined(__clang__) +#define SIMDJSON_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG) +#else +#define SIMDJSON_CPLUSPLUS __cplusplus +#endif +#endif + +// C++ 17 +#if !defined(SIMDJSON_CPLUSPLUS17) && (SIMDJSON_CPLUSPLUS >= 201703L) +#define SIMDJSON_CPLUSPLUS17 1 +#endif + +// C++ 14 +#if !defined(SIMDJSON_CPLUSPLUS14) && (SIMDJSON_CPLUSPLUS >= 201402L) +#define SIMDJSON_CPLUSPLUS14 1 +#endif + +// C++ 11 +#if !defined(SIMDJSON_CPLUSPLUS11) && (SIMDJSON_CPLUSPLUS >= 201103L) +#define SIMDJSON_CPLUSPLUS11 1 +#endif + +#ifndef SIMDJSON_CPLUSPLUS11 +#error simdjson requires a compiler compliant with the C++11 standard +#endif + +#endif // SIMDJSON_COMPILER_CHECK_H +/* end file simdjson/compiler_check.h */ +/* including simdjson/portability.h: #include "simdjson/portability.h" */ +/* begin file simdjson/portability.h */ +#ifndef SIMDJSON_PORTABILITY_H +#define SIMDJSON_PORTABILITY_H + +#include +#include +#include +#include +#include +#ifndef _WIN32 +// strcasecmp, strncasecmp +#include +#endif + +#ifdef _MSC_VER +#define SIMDJSON_VISUAL_STUDIO 1 +/** + * We want to differentiate carefully between + * clang under visual studio and regular visual + * studio. + * + * Under clang for Windows, we enable: + * * target pragmas so that part and only part of the + * code gets compiled for advanced instructions. + * + */ +#ifdef __clang__ +// clang under visual studio +#define SIMDJSON_CLANG_VISUAL_STUDIO 1 +#else +// just regular visual studio (best guess) +#define SIMDJSON_REGULAR_VISUAL_STUDIO 1 +#endif // __clang__ +#endif // _MSC_VER + +#if defined(__x86_64__) || defined(_M_AMD64) +#define SIMDJSON_IS_X86_64 1 +#elif defined(__aarch64__) || defined(_M_ARM64) +#define SIMDJSON_IS_ARM64 1 +#elif defined(__PPC64__) || defined(_M_PPC64) +#if defined(__ALTIVEC__) +#define SIMDJSON_IS_PPC64_VMX 1 +#endif // defined(__ALTIVEC__) +#else +#define SIMDJSON_IS_32BITS 1 + +#if defined(_M_IX86) || defined(__i386__) +#define SIMDJSON_IS_X86_32BITS 1 +#elif defined(__arm__) || defined(_M_ARM) +#define SIMDJSON_IS_ARM_32BITS 1 +#elif defined(__PPC__) || defined(_M_PPC) +#define SIMDJSON_IS_PPC_32BITS 1 +#endif + +#endif // defined(__x86_64__) || defined(_M_AMD64) +#ifndef SIMDJSON_IS_32BITS +#define SIMDJSON_IS_32BITS 0 +#endif + +#if SIMDJSON_IS_32BITS +#ifndef SIMDJSON_NO_PORTABILITY_WARNING +#pragma message("The simdjson library is designed \ +for 64-bit processors and it seems that you are not \ +compiling for a known 64-bit platform. All fast kernels \ +will be disabled and performance may be poor. Please \ +use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.") +#endif // SIMDJSON_NO_PORTABILITY_WARNING +#endif // SIMDJSON_IS_32BITS + +#define SIMDJSON_CAT_IMPLEMENTATION_(a,...) a ## __VA_ARGS__ +#define SIMDJSON_CAT(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a,...) #a SIMDJSON_STRINGIFY(__VA_ARGS__) +#define SIMDJSON_STRINGIFY(a,...) SIMDJSON_CAT_IMPLEMENTATION_(a, __VA_ARGS__) + +// this is almost standard? +#undef SIMDJSON_STRINGIFY_IMPLEMENTATION_ +#undef SIMDJSON_STRINGIFY +#define SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) #a +#define SIMDJSON_STRINGIFY(a) SIMDJSON_STRINGIFY_IMPLEMENTATION_(a) + +// Our fast kernels require 64-bit systems. +// +// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions. +// Furthermore, the number of SIMD registers is reduced. +// +// On 32-bit ARM, we would have smaller registers. +// +// The simdjson users should still have the fallback kernel. It is +// slower, but it should run everywhere. + +// +// Enable valid runtime implementations, and select SIMDJSON_BUILTIN_IMPLEMENTATION +// + +// We are going to use runtime dispatch. +#if SIMDJSON_IS_X86_64 +#ifdef __clang__ +// clang does not have GCC push pop +// warning: clang attribute push can't be used within a namespace in clang up +// til 8.0 so SIMDJSON_TARGET_REGION and SIMDJSON_UNTARGET_REGION must be *outside* of a +// namespace. +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma(SIMDJSON_STRINGIFY( \ + clang attribute push(__attribute__((target(T))), apply_to = function))) +#define SIMDJSON_UNTARGET_REGION _Pragma("clang attribute pop") +#elif defined(__GNUC__) +// GCC is easier +#define SIMDJSON_TARGET_REGION(T) \ + _Pragma("GCC push_options") _Pragma(SIMDJSON_STRINGIFY(GCC target(T))) +#define SIMDJSON_UNTARGET_REGION _Pragma("GCC pop_options") +#endif // clang then gcc + +#endif // x86 + +// Default target region macros don't do anything. +#ifndef SIMDJSON_TARGET_REGION +#define SIMDJSON_TARGET_REGION(T) +#define SIMDJSON_UNTARGET_REGION +#endif + +// Is threading enabled? +#if defined(_REENTRANT) || defined(_MT) +#ifndef SIMDJSON_THREADS_ENABLED +#define SIMDJSON_THREADS_ENABLED +#endif +#endif + +// workaround for large stack sizes under -O0. +// https://github.com/simdjson/simdjson/issues/691 +#ifdef __APPLE__ +#ifndef __OPTIMIZE__ +// Apple systems have small stack sizes in secondary threads. +// Lack of compiler optimization may generate high stack usage. +// Users may want to disable threads for safety, but only when +// in debug mode which we detect by the fact that the __OPTIMIZE__ +// macro is not defined. +#undef SIMDJSON_THREADS_ENABLED +#endif +#endif + + +#if defined(__clang__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined"))) +#elif defined(__GNUC__) +#define SIMDJSON_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined)) +#else +#define SIMDJSON_NO_SANITIZE_UNDEFINED +#endif + + +#if defined(__clang__) || defined(__GNUC__) +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) +#define SIMDJSON_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) +# endif // if __has_feature(memory_sanitizer) +#endif // defined(__has_feature) +#endif +// make sure it is defined as 'nothing' if it is unapplicable. +#ifndef SIMDJSON_NO_SANITIZE_MEMORY +#define SIMDJSON_NO_SANITIZE_MEMORY +#endif + +#if SIMDJSON_VISUAL_STUDIO +// This is one case where we do not distinguish between +// regular visual studio and clang under visual studio. +// clang under Windows has _stricmp (like visual studio) but not strcasecmp (as clang normally has) +#define simdjson_strcasecmp _stricmp +#define simdjson_strncasecmp _strnicmp +#else +// The strcasecmp, strncasecmp, and strcasestr functions do not work with multibyte strings (e.g. UTF-8). +// So they are only useful for ASCII in our context. +// https://www.gnu.org/software/libunistring/manual/libunistring.html#char-_002a-strings +#define simdjson_strcasecmp strcasecmp +#define simdjson_strncasecmp strncasecmp +#endif + +#if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, +// then do away with asserts and use __assume. +#if SIMDJSON_VISUAL_STUDIO +#define SIMDJSON_UNREACHABLE() __assume(0) +#define SIMDJSON_ASSUME(COND) __assume(COND) +#else +#define SIMDJSON_UNREACHABLE() __builtin_unreachable(); +#define SIMDJSON_ASSUME(COND) do { if (!(COND)) __builtin_unreachable(); } while (0) +#endif + +#else // defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) +// This should only ever be enabled in debug mode. +#define SIMDJSON_UNREACHABLE() assert(0); +#define SIMDJSON_ASSUME(COND) assert(COND) + +#endif + +#endif // SIMDJSON_PORTABILITY_H +/* end file simdjson/portability.h */ + +namespace simdjson { +namespace internal { +/** + * @private + * Our own implementation of the C++17 to_chars function. + * Defined in src/to_chars + */ +char *to_chars(char *first, const char *last, double value); +/** + * @private + * A number parsing routine. + * Defined in src/from_chars + */ +double from_chars(const char *first) noexcept; +double from_chars(const char *first, const char* end) noexcept; +} + +#ifndef SIMDJSON_EXCEPTIONS +#if __cpp_exceptions +#define SIMDJSON_EXCEPTIONS 1 +#else +#define SIMDJSON_EXCEPTIONS 0 +#endif +#endif + +} // namespace simdjson + +#if defined(__GNUC__) + // Marks a block with a name so that MCA analysis can see it. + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-BEGIN " #name); + #define SIMDJSON_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name); + #define SIMDJSON_DEBUG_BLOCK(name, block) BEGIN_DEBUG_BLOCK(name); block; END_DEBUG_BLOCK(name); +#else + #define SIMDJSON_BEGIN_DEBUG_BLOCK(name) + #define SIMDJSON_END_DEBUG_BLOCK(name) + #define SIMDJSON_DEBUG_BLOCK(name, block) +#endif + +// Align to N-byte boundary +#define SIMDJSON_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1)) +#define SIMDJSON_ROUNDDOWN_N(a, n) ((a) & ~((n)-1)) + +#define SIMDJSON_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0) + +#if SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline __forceinline + #define simdjson_never_inline __declspec(noinline) + + #define simdjson_unused + #define simdjson_warn_unused + + #ifndef simdjson_likely + #define simdjson_likely(x) x + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) x + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS __pragma(warning( push )) + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS __pragma(warning( push, 0 )) + #define SIMDJSON_DISABLE_VS_WARNING(WARNING_NUMBER) __pragma(warning( disable : WARNING_NUMBER )) + // Get rid of Intellisense-only warnings (Code Analysis) + // Though __has_include is C++17, it is supported in Visual Studio 2017 or better (_MSC_VER>=1910). + #ifdef __has_include + #if __has_include() + #include + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS) + #endif + #endif + + #ifndef SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_VS_WARNING(4996) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING + #define SIMDJSON_POP_DISABLE_WARNINGS __pragma(warning( pop )) + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + + #define simdjson_really_inline inline __attribute__((always_inline)) + #define simdjson_never_inline inline __attribute__((noinline)) + + #define simdjson_unused __attribute__((unused)) + #define simdjson_warn_unused __attribute__((warn_unused_result)) + + #ifndef simdjson_likely + #define simdjson_likely(x) __builtin_expect(!!(x), 1) + #endif + #ifndef simdjson_unlikely + #define simdjson_unlikely(x) __builtin_expect(!!(x), 0) + #endif + + #define SIMDJSON_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push") + // gcc doesn't seem to disable all warnings with all and extra, add warnings here as necessary + // We do it separately for clang since it has different warnings. + #ifdef __clang__ + // clang is missing -Wmaybe-uninitialized. + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) + #else // __clang__ + #define SIMDJSON_PUSH_DISABLE_ALL_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Weffc++) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wall) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wconversion) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wextra) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wattributes) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wreturn-type) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wshadow) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-parameter) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused-variable) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wmaybe-uninitialized) \ + SIMDJSON_DISABLE_GCC_WARNING(-Wformat-security) + #endif // __clang__ + + #define SIMDJSON_PRAGMA(P) _Pragma(#P) + #define SIMDJSON_DISABLE_GCC_WARNING(WARNING) SIMDJSON_PRAGMA(GCC diagnostic ignored #WARNING) + #if SIMDJSON_CLANG_VISUAL_STUDIO + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS SIMDJSON_DISABLE_GCC_WARNING(-Wmicrosoft-include) + #else + #define SIMDJSON_DISABLE_UNDESIRED_WARNINGS + #endif + #define SIMDJSON_DISABLE_DEPRECATED_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wdeprecated-declarations) + #define SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING SIMDJSON_DISABLE_GCC_WARNING(-Wstrict-overflow) + #define SIMDJSON_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop") + + #define SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS SIMDJSON_PUSH_DISABLE_WARNINGS \ + SIMDJSON_DISABLE_GCC_WARNING(-Wunused) + #define SIMDJSON_POP_DISABLE_UNUSED_WARNINGS SIMDJSON_POP_DISABLE_WARNINGS + + + +#endif // MSC_VER + +#if defined(simdjson_inline) + // Prefer the user's definition of simdjson_inline; don't define it ourselves. +#elif defined(__GNUC__) && !defined(__OPTIMIZE__) + // If optimizations are disabled, forcing inlining can lead to significant + // code bloat and high compile times. Don't use simdjson_really_inline for + // unoptimized builds. + #define simdjson_inline inline +#else + // Force inlining for most simdjson functions. + #define simdjson_inline simdjson_really_inline +#endif + +#if SIMDJSON_VISUAL_STUDIO + /** + * Windows users need to do some extra work when building + * or using a dynamic library (DLL). When building, we need + * to set SIMDJSON_DLLIMPORTEXPORT to __declspec(dllexport). + * When *using* the DLL, the user needs to set + * SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport). + * + * Static libraries not need require such work. + * + * It does not matter here whether you are using + * the regular visual studio or clang under visual + * studio, you still need to handle these issues. + * + * Non-Windows systems do not have this complexity. + */ + #if SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY + // We set SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY when we build a DLL under Windows. + // It should never happen that both SIMDJSON_BUILDING_WINDOWS_DYNAMIC_LIBRARY and + // SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY are set. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllexport) + #elif SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY + // Windows user who call a dynamic library should set SIMDJSON_USING_WINDOWS_DYNAMIC_LIBRARY to 1. + #define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) + #else + // We assume by default static linkage + #define SIMDJSON_DLLIMPORTEXPORT + #endif + +/** + * Workaround for the vcpkg package manager. Only vcpkg should + * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. + */ +#if SIMDJSON_USING_LIBRARY +#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) +#endif +/** + * End of workaround for the vcpkg package manager. + */ +#else + #define SIMDJSON_DLLIMPORTEXPORT +#endif + +// C++17 requires string_view. +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_HAS_STRING_VIEW +#include // by the standard, this has to be safe. +#endif + +// This macro (__cpp_lib_string_view) has to be defined +// for C++17 and better, but if it is otherwise defined, +// we are going to assume that string_view is available +// even if we do not have C++17 support. +#ifdef __cpp_lib_string_view +#define SIMDJSON_HAS_STRING_VIEW +#endif + +// Some systems have string_view even if we do not have C++17 support, +// and even if __cpp_lib_string_view is undefined, it is the case +// with Apple clang version 11. +// We must handle it. *This is important.* +#ifndef SIMDJSON_HAS_STRING_VIEW +#if defined __has_include +// do not combine the next #if with the previous one (unsafe) +#if __has_include () +// now it is safe to trigger the include +#include // though the file is there, it does not follow that we got the implementation +#if defined(_LIBCPP_STRING_VIEW) +// Ah! So we under libc++ which under its Library Fundamentals Technical Specification, which preceded C++17, +// included string_view. +// This means that we have string_view *even though* we may not have C++17. +#define SIMDJSON_HAS_STRING_VIEW +#endif // _LIBCPP_STRING_VIEW +#endif // __has_include () +#endif // defined __has_include +#endif // def SIMDJSON_HAS_STRING_VIEW +// end of complicated but important routine to try to detect string_view. + +// +// Backfill std::string_view using nonstd::string_view on systems where +// we expect that string_view is missing. Important: if we get this wrong, +// we will end up with two string_view definitions and potential trouble. +// That is why we work so hard above to avoid it. +// +#ifndef SIMDJSON_HAS_STRING_VIEW +SIMDJSON_PUSH_DISABLE_ALL_WARNINGS +/* including simdjson/nonstd/string_view.hpp: #include "simdjson/nonstd/string_view.hpp" */ +/* begin file simdjson/nonstd/string_view.hpp */ +// Copyright 2017-2020 by Martin Moene +// +// string-view lite, a C++17-like string_view for C++98 and later. +// For more information see https://github.com/martinmoene/string-view-lite +// +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#ifndef NONSTD_SV_LITE_H_INCLUDED +#define NONSTD_SV_LITE_H_INCLUDED + +#define string_view_lite_MAJOR 1 +#define string_view_lite_MINOR 7 +#define string_view_lite_PATCH 0 + +#define string_view_lite_VERSION nssv_STRINGIFY(string_view_lite_MAJOR) "." nssv_STRINGIFY(string_view_lite_MINOR) "." nssv_STRINGIFY(string_view_lite_PATCH) + +#define nssv_STRINGIFY( x ) nssv_STRINGIFY_( x ) +#define nssv_STRINGIFY_( x ) #x + +// string-view lite configuration: + +#define nssv_STRING_VIEW_DEFAULT 0 +#define nssv_STRING_VIEW_NONSTD 1 +#define nssv_STRING_VIEW_STD 2 + +// tweak header support: + +#ifdef __has_include +# if __has_include() +# include +# endif +#define nssv_HAVE_TWEAK_HEADER 1 +#else +#define nssv_HAVE_TWEAK_HEADER 0 +//# pragma message("string_view.hpp: Note: Tweak header not supported.") +#endif + +// string_view selection and configuration: + +#if !defined( nssv_CONFIG_SELECT_STRING_VIEW ) +# define nssv_CONFIG_SELECT_STRING_VIEW ( nssv_HAVE_STD_STRING_VIEW ? nssv_STRING_VIEW_STD : nssv_STRING_VIEW_NONSTD ) +#endif + +#ifndef nssv_CONFIG_STD_SV_OPERATOR +# define nssv_CONFIG_STD_SV_OPERATOR 0 +#endif + +#ifndef nssv_CONFIG_USR_SV_OPERATOR +# define nssv_CONFIG_USR_SV_OPERATOR 1 +#endif + +#ifdef nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS nssv_CONFIG_CONVERSION_STD_STRING +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS nssv_CONFIG_CONVERSION_STD_STRING +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +# define nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS 1 +#endif + +#ifndef nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +# define nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS 1 +#endif + +#ifndef nssv_CONFIG_NO_STREAM_INSERTION +# define nssv_CONFIG_NO_STREAM_INSERTION 0 +#endif + +// Control presence of exception handling (try and auto discover): + +#ifndef nssv_CONFIG_NO_EXCEPTIONS +# if defined(_MSC_VER) +# include // for _HAS_EXCEPTIONS +# endif +# if defined(__cpp_exceptions) || defined(__EXCEPTIONS) || (_HAS_EXCEPTIONS) +# define nssv_CONFIG_NO_EXCEPTIONS 0 +# else +# define nssv_CONFIG_NO_EXCEPTIONS 1 +# endif +#endif + +// C++ language version detection (C++23 is speculative): +// Note: VC14.0/1900 (VS2015) lacks too much from C++14. + +#ifndef nssv_CPLUSPLUS +# if defined(_MSVC_LANG ) && !defined(__clang__) +# define nssv_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG ) +# else +# define nssv_CPLUSPLUS __cplusplus +# endif +#endif + +#define nssv_CPP98_OR_GREATER ( nssv_CPLUSPLUS >= 199711L ) +#define nssv_CPP11_OR_GREATER ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP11_OR_GREATER_ ( nssv_CPLUSPLUS >= 201103L ) +#define nssv_CPP14_OR_GREATER ( nssv_CPLUSPLUS >= 201402L ) +#define nssv_CPP17_OR_GREATER ( nssv_CPLUSPLUS >= 201703L ) +#define nssv_CPP20_OR_GREATER ( nssv_CPLUSPLUS >= 202002L ) +#define nssv_CPP23_OR_GREATER ( nssv_CPLUSPLUS >= 202300L ) + +// use C++17 std::string_view if available and requested: + +#if nssv_CPP17_OR_GREATER && defined(__has_include ) +# if __has_include( ) +# define nssv_HAVE_STD_STRING_VIEW 1 +# else +# define nssv_HAVE_STD_STRING_VIEW 0 +# endif +#else +# define nssv_HAVE_STD_STRING_VIEW 0 +#endif + +#define nssv_USES_STD_STRING_VIEW ( (nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_STD) || ((nssv_CONFIG_SELECT_STRING_VIEW == nssv_STRING_VIEW_DEFAULT) && nssv_HAVE_STD_STRING_VIEW) ) + +#define nssv_HAVE_STARTS_WITH ( nssv_CPP20_OR_GREATER || !nssv_USES_STD_STRING_VIEW ) +#define nssv_HAVE_ENDS_WITH nssv_HAVE_STARTS_WITH + +// +// Use C++17 std::string_view: +// + +#if nssv_USES_STD_STRING_VIEW + +#include + +// Extensions for std::string: + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( std::basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string_view +to_string_view( std::basic_string const & s ) +{ + return std::basic_string_view( s.data(), s.size() ); +} + +// Literal operators sv and _sv: + +#if nssv_CONFIG_STD_SV_OPERATOR + +using namespace std::literals::string_view_literals; + +#endif + +#if nssv_CONFIG_USR_SV_OPERATOR + +inline namespace literals { +inline namespace string_view_literals { + + +constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +{ + return std::string_view{ str, len }; +} + +constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +{ + return std::u16string_view{ str, len }; +} + +constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +{ + return std::u32string_view{ str, len }; +} + +constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +{ + return std::wstring_view{ str, len }; +} + +}} // namespace literals::string_view_literals + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +} // namespace nonstd + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { + +using std::string_view; +using std::wstring_view; +using std::u16string_view; +using std::u32string_view; +using std::basic_string_view; + +// literal "sv" and "_sv", see above + +using std::operator==; +using std::operator!=; +using std::operator<; +using std::operator<=; +using std::operator>; +using std::operator>=; + +using std::operator<<; + +} // namespace nonstd + +#else // nssv_HAVE_STD_STRING_VIEW + +// +// Before C++17: use string_view lite: +// + +// Compiler versions: +// +// MSVC++ 6.0 _MSC_VER == 1200 nssv_COMPILER_MSVC_VERSION == 60 (Visual Studio 6.0) +// MSVC++ 7.0 _MSC_VER == 1300 nssv_COMPILER_MSVC_VERSION == 70 (Visual Studio .NET 2002) +// MSVC++ 7.1 _MSC_VER == 1310 nssv_COMPILER_MSVC_VERSION == 71 (Visual Studio .NET 2003) +// MSVC++ 8.0 _MSC_VER == 1400 nssv_COMPILER_MSVC_VERSION == 80 (Visual Studio 2005) +// MSVC++ 9.0 _MSC_VER == 1500 nssv_COMPILER_MSVC_VERSION == 90 (Visual Studio 2008) +// MSVC++ 10.0 _MSC_VER == 1600 nssv_COMPILER_MSVC_VERSION == 100 (Visual Studio 2010) +// MSVC++ 11.0 _MSC_VER == 1700 nssv_COMPILER_MSVC_VERSION == 110 (Visual Studio 2012) +// MSVC++ 12.0 _MSC_VER == 1800 nssv_COMPILER_MSVC_VERSION == 120 (Visual Studio 2013) +// MSVC++ 14.0 _MSC_VER == 1900 nssv_COMPILER_MSVC_VERSION == 140 (Visual Studio 2015) +// MSVC++ 14.1 _MSC_VER >= 1910 nssv_COMPILER_MSVC_VERSION == 141 (Visual Studio 2017) +// MSVC++ 14.2 _MSC_VER >= 1920 nssv_COMPILER_MSVC_VERSION == 142 (Visual Studio 2019) + +#if defined(_MSC_VER ) && !defined(__clang__) +# define nssv_COMPILER_MSVC_VER (_MSC_VER ) +# define nssv_COMPILER_MSVC_VERSION (_MSC_VER / 10 - 10 * ( 5 + (_MSC_VER < 1900 ) ) ) +#else +# define nssv_COMPILER_MSVC_VER 0 +# define nssv_COMPILER_MSVC_VERSION 0 +#endif + +#define nssv_COMPILER_VERSION( major, minor, patch ) ( 10 * ( 10 * (major) + (minor) ) + (patch) ) + +#if defined( __apple_build_version__ ) +# define nssv_COMPILER_APPLECLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +# define nssv_COMPILER_CLANG_VERSION 0 +#elif defined( __clang__ ) +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION nssv_COMPILER_VERSION(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define nssv_COMPILER_APPLECLANG_VERSION 0 +# define nssv_COMPILER_CLANG_VERSION 0 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +# define nssv_COMPILER_GNUC_VERSION nssv_COMPILER_VERSION(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define nssv_COMPILER_GNUC_VERSION 0 +#endif + +// half-open range [lo..hi): +#define nssv_BETWEEN( v, lo, hi ) ( (lo) <= (v) && (v) < (hi) ) + +// Presence of language and library features: + +#ifdef _HAS_CPP0X +# define nssv_HAS_CPP0X _HAS_CPP0X +#else +# define nssv_HAS_CPP0X 0 +#endif + +// Unless defined otherwise below, consider VC14 as C++11 for variant-lite: + +#if nssv_COMPILER_MSVC_VER >= 1900 +# undef nssv_CPP11_OR_GREATER +# define nssv_CPP11_OR_GREATER 1 +#endif + +#define nssv_CPP11_90 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1500) +#define nssv_CPP11_100 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1600) +#define nssv_CPP11_110 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1700) +#define nssv_CPP11_120 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1800) +#define nssv_CPP11_140 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1900) +#define nssv_CPP11_141 (nssv_CPP11_OR_GREATER_ || nssv_COMPILER_MSVC_VER >= 1910) + +#define nssv_CPP14_000 (nssv_CPP14_OR_GREATER) +#define nssv_CPP17_000 (nssv_CPP17_OR_GREATER) + +// Presence of C++11 language features: + +#define nssv_HAVE_CONSTEXPR_11 nssv_CPP11_140 +#define nssv_HAVE_EXPLICIT_CONVERSION nssv_CPP11_140 +#define nssv_HAVE_INLINE_NAMESPACE nssv_CPP11_140 +#define nssv_HAVE_IS_DEFAULT nssv_CPP11_140 +#define nssv_HAVE_IS_DELETE nssv_CPP11_140 +#define nssv_HAVE_NOEXCEPT nssv_CPP11_140 +#define nssv_HAVE_NULLPTR nssv_CPP11_100 +#define nssv_HAVE_REF_QUALIFIER nssv_CPP11_140 +#define nssv_HAVE_UNICODE_LITERALS nssv_CPP11_140 +#define nssv_HAVE_USER_DEFINED_LITERALS nssv_CPP11_140 +#define nssv_HAVE_WCHAR16_T nssv_CPP11_100 +#define nssv_HAVE_WCHAR32_T nssv_CPP11_100 + +#if ! ( ( nssv_CPP11_OR_GREATER && nssv_COMPILER_CLANG_VERSION ) || nssv_BETWEEN( nssv_COMPILER_CLANG_VERSION, 300, 400 ) ) +# define nssv_HAVE_STD_DEFINED_LITERALS nssv_CPP11_140 +#else +# define nssv_HAVE_STD_DEFINED_LITERALS 0 +#endif + +// Presence of C++14 language features: + +#define nssv_HAVE_CONSTEXPR_14 nssv_CPP14_000 + +// Presence of C++17 language features: + +#define nssv_HAVE_NODISCARD nssv_CPP17_000 + +// Presence of C++ library features: + +#define nssv_HAVE_STD_HASH nssv_CPP11_120 + +// Presence of compiler intrinsics: + +// Providing char-type specializations for compare() and length() that +// use compiler intrinsics can improve compile- and run-time performance. +// +// The challenge is in using the right combinations of builtin availability +// and its constexpr-ness. +// +// | compiler | __builtin_memcmp (constexpr) | memcmp (constexpr) | +// |----------|------------------------------|---------------------| +// | clang | 4.0 (>= 4.0 ) | any (? ) | +// | clang-a | 9.0 (>= 9.0 ) | any (? ) | +// | gcc | any (constexpr) | any (? ) | +// | msvc | >= 14.2 C++17 (>= 14.2 ) | any (? ) | + +#define nssv_HAVE_BUILTIN_VER ( (nssv_CPP17_000 && nssv_COMPILER_MSVC_VERSION >= 142) || nssv_COMPILER_GNUC_VERSION > 0 || nssv_COMPILER_CLANG_VERSION >= 400 || nssv_COMPILER_APPLECLANG_VERSION >= 900 ) +#define nssv_HAVE_BUILTIN_CE ( nssv_HAVE_BUILTIN_VER ) + +#define nssv_HAVE_BUILTIN_MEMCMP ( (nssv_HAVE_CONSTEXPR_14 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_14 ) +#define nssv_HAVE_BUILTIN_STRLEN ( (nssv_HAVE_CONSTEXPR_11 && nssv_HAVE_BUILTIN_CE) || !nssv_HAVE_CONSTEXPR_11 ) + +#ifdef __has_builtin +# define nssv_HAVE_BUILTIN( x ) __has_builtin( x ) +#else +# define nssv_HAVE_BUILTIN( x ) 0 +#endif + +#if nssv_HAVE_BUILTIN(__builtin_memcmp) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_MEMCMP __builtin_memcmp +#else +# define nssv_BUILTIN_MEMCMP memcmp +#endif + +#if nssv_HAVE_BUILTIN(__builtin_strlen) || nssv_HAVE_BUILTIN_VER +# define nssv_BUILTIN_STRLEN __builtin_strlen +#else +# define nssv_BUILTIN_STRLEN strlen +#endif + +// C++ feature usage: + +#if nssv_HAVE_CONSTEXPR_11 +# define nssv_constexpr constexpr +#else +# define nssv_constexpr /*constexpr*/ +#endif + +#if nssv_HAVE_CONSTEXPR_14 +# define nssv_constexpr14 constexpr +#else +# define nssv_constexpr14 /*constexpr*/ +#endif + +#if nssv_HAVE_EXPLICIT_CONVERSION +# define nssv_explicit explicit +#else +# define nssv_explicit /*explicit*/ +#endif + +#if nssv_HAVE_INLINE_NAMESPACE +# define nssv_inline_ns inline +#else +# define nssv_inline_ns /*inline*/ +#endif + +#if nssv_HAVE_NOEXCEPT +# define nssv_noexcept noexcept +#else +# define nssv_noexcept /*noexcept*/ +#endif + +//#if nssv_HAVE_REF_QUALIFIER +//# define nssv_ref_qual & +//# define nssv_refref_qual && +//#else +//# define nssv_ref_qual /*&*/ +//# define nssv_refref_qual /*&&*/ +//#endif + +#if nssv_HAVE_NULLPTR +# define nssv_nullptr nullptr +#else +# define nssv_nullptr NULL +#endif + +#if nssv_HAVE_NODISCARD +# define nssv_nodiscard [[nodiscard]] +#else +# define nssv_nodiscard /*[[nodiscard]]*/ +#endif + +// Additional includes: + +#include +#include +#include +#include +#include // std::char_traits<> + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +# include +#endif + +#if ! nssv_CONFIG_NO_EXCEPTIONS +# include +#endif + +#if nssv_CPP11_OR_GREATER +# include +#endif + +// Clang, GNUC, MSVC warning suppression macros: + +#if defined(__clang__) +# pragma clang diagnostic ignored "-Wreserved-user-defined-literal" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wuser-defined-literals" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wliteral-suffix" +#endif // __clang__ + +#if nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_SUPPRESS_MSGSL_WARNING(expr) [[gsl::suppress(expr)]] +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) __pragma(warning(suppress: code) ) +# define nssv_DISABLE_MSVC_WARNINGS(codes) __pragma(warning(push)) __pragma(warning(disable: codes)) +#else +# define nssv_SUPPRESS_MSGSL_WARNING(expr) +# define nssv_SUPPRESS_MSVC_WARNING(code, descr) +# define nssv_DISABLE_MSVC_WARNINGS(codes) +#endif + +#if defined(__clang__) +# define nssv_RESTORE_WARNINGS() _Pragma("clang diagnostic pop") +#elif defined(__GNUC__) +# define nssv_RESTORE_WARNINGS() _Pragma("GCC diagnostic pop") +#elif nssv_COMPILER_MSVC_VERSION >= 140 +# define nssv_RESTORE_WARNINGS() __pragma(warning(pop )) +#else +# define nssv_RESTORE_WARNINGS() +#endif + +// Suppress the following MSVC (GSL) warnings: +// - C4455, non-gsl : 'operator ""sv': literal suffix identifiers that do not +// start with an underscore are reserved +// - C26472, gsl::t.1 : don't use a static_cast for arithmetic conversions; +// use brace initialization, gsl::narrow_cast or gsl::narow +// - C26481: gsl::b.1 : don't use pointer arithmetic. Use span instead + +nssv_DISABLE_MSVC_WARNINGS( 4455 26481 26472 ) +//nssv_DISABLE_CLANG_WARNINGS( "-Wuser-defined-literals" ) +//nssv_DISABLE_GNUC_WARNINGS( -Wliteral-suffix ) + +namespace nonstd { namespace sv_lite { + +// +// basic_string_view declaration: +// + +template +< + class CharT, + class Traits = std::char_traits +> +class basic_string_view; + +namespace detail { + +// support constexpr comparison in C++14; +// for C++17 and later, use provided traits: + +template< typename CharT > +inline nssv_constexpr14 int compare( CharT const * s1, CharT const * s2, std::size_t count ) +{ + while ( count-- != 0 ) + { + if ( *s1 < *s2 ) return -1; + if ( *s1 > *s2 ) return +1; + ++s1; ++s2; + } + return 0; +} + +#if nssv_HAVE_BUILTIN_MEMCMP + +// specialization of compare() for char, see also generic compare() above: + +inline nssv_constexpr14 int compare( char const * s1, char const * s2, std::size_t count ) +{ + return nssv_BUILTIN_MEMCMP( s1, s2, count ); +} + +#endif + +#if nssv_HAVE_BUILTIN_STRLEN + +// specialization of length() for char, see also generic length() further below: + +inline nssv_constexpr std::size_t length( char const * s ) +{ + return nssv_BUILTIN_STRLEN( s ); +} + +#endif + +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make length() non-recursive: + +template< typename CharT > +inline nssv_constexpr std::size_t length( CharT * s, std::size_t result = 0 ) +{ + return *s == '\0' ? result : length( s + 1, result + 1 ); +} + +#else // OPTIMIZE + +// non-recursive: + +template< typename CharT > +inline nssv_constexpr14 std::size_t length( CharT * s ) +{ + std::size_t result = 0; + while ( *s++ != '\0' ) + { + ++result; + } + return result; +} + +#endif // OPTIMIZE + +#if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER +#if defined(__OPTIMIZE__) + +// gcc, clang provide __OPTIMIZE__ +// Expect tail call optimization to make search() non-recursive: + +template< class CharT, class Traits = std::char_traits > +constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + return haystack.starts_with( needle ) ? haystack.begin() : + haystack.empty() ? haystack.end() : search( haystack.substr(1), needle ); +} + +#else // OPTIMIZE + +// non-recursive: + +template< class CharT, class Traits = std::char_traits > +constexpr const CharT* search( basic_string_view haystack, basic_string_view needle ) +{ + return std::search( haystack.begin(), haystack.end(), needle.begin(), needle.end() ); +} + +#endif // OPTIMIZE +#endif // nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER + +} // namespace detail + +// +// basic_string_view: +// + +template +< + class CharT, + class Traits /* = std::char_traits */ +> +class basic_string_view +{ +public: + // Member types: + + typedef Traits traits_type; + typedef CharT value_type; + + typedef CharT * pointer; + typedef CharT const * const_pointer; + typedef CharT & reference; + typedef CharT const & const_reference; + + typedef const_pointer iterator; + typedef const_pointer const_iterator; + typedef std::reverse_iterator< const_iterator > reverse_iterator; + typedef std::reverse_iterator< const_iterator > const_reverse_iterator; + + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // 24.4.2.1 Construction and assignment: + + nssv_constexpr basic_string_view() nssv_noexcept + : data_( nssv_nullptr ) + , size_( 0 ) + {} + +#if nssv_CPP11_OR_GREATER + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr basic_string_view( basic_string_view const & other ) nssv_noexcept + : data_( other.data_) + , size_( other.size_) + {} +#endif + + nssv_constexpr basic_string_view( CharT const * s, size_type count ) nssv_noexcept // non-standard noexcept + : data_( s ) + , size_( count ) + {} + + nssv_constexpr basic_string_view( CharT const * s) nssv_noexcept // non-standard noexcept + : data_( s ) +#if nssv_CPP17_OR_GREATER + , size_( Traits::length(s) ) +#elif nssv_CPP11_OR_GREATER + , size_( detail::length(s) ) +#else + , size_( Traits::length(s) ) +#endif + {} + +#if nssv_HAVE_NULLPTR +# if nssv_HAVE_IS_DELETE + nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept = delete; +# else + private: nssv_constexpr basic_string_view( std::nullptr_t ) nssv_noexcept; public: +# endif +#endif + + // Assignment: + +#if nssv_CPP11_OR_GREATER + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept = default; +#else + nssv_constexpr14 basic_string_view & operator=( basic_string_view const & other ) nssv_noexcept + { + data_ = other.data_; + size_ = other.size_; + return *this; + } +#endif + + // 24.4.2.2 Iterator support: + + nssv_constexpr const_iterator begin() const nssv_noexcept { return data_; } + nssv_constexpr const_iterator end() const nssv_noexcept { return data_ + size_; } + + nssv_constexpr const_iterator cbegin() const nssv_noexcept { return begin(); } + nssv_constexpr const_iterator cend() const nssv_noexcept { return end(); } + + nssv_constexpr const_reverse_iterator rbegin() const nssv_noexcept { return const_reverse_iterator( end() ); } + nssv_constexpr const_reverse_iterator rend() const nssv_noexcept { return const_reverse_iterator( begin() ); } + + nssv_constexpr const_reverse_iterator crbegin() const nssv_noexcept { return rbegin(); } + nssv_constexpr const_reverse_iterator crend() const nssv_noexcept { return rend(); } + + // 24.4.2.3 Capacity: + + nssv_constexpr size_type size() const nssv_noexcept { return size_; } + nssv_constexpr size_type length() const nssv_noexcept { return size_; } + nssv_constexpr size_type max_size() const nssv_noexcept { return (std::numeric_limits< size_type >::max)(); } + + // since C++20 + nssv_nodiscard nssv_constexpr bool empty() const nssv_noexcept + { + return 0 == size_; + } + + // 24.4.2.4 Element access: + + nssv_constexpr const_reference operator[]( size_type pos ) const + { + return data_at( pos ); + } + + nssv_constexpr14 const_reference at( size_type pos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos < size() ); +#else + if ( pos >= size() ) + { + throw std::out_of_range("nonstd::string_view::at()"); + } +#endif + return data_at( pos ); + } + + nssv_constexpr const_reference front() const { return data_at( 0 ); } + nssv_constexpr const_reference back() const { return data_at( size() - 1 ); } + + nssv_constexpr const_pointer data() const nssv_noexcept { return data_; } + + // 24.4.2.5 Modifiers: + + nssv_constexpr14 void remove_prefix( size_type n ) + { + assert( n <= size() ); + data_ += n; + size_ -= n; + } + + nssv_constexpr14 void remove_suffix( size_type n ) + { + assert( n <= size() ); + size_ -= n; + } + + nssv_constexpr14 void swap( basic_string_view & other ) nssv_noexcept + { + const basic_string_view tmp(other); + other = *this; + *this = tmp; + } + + // 24.4.2.6 String operations: + + size_type copy( CharT * dest, size_type n, size_type pos = 0 ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::copy()"); + } +#endif + const size_type rlen = (std::min)( n, size() - pos ); + + (void) Traits::copy( dest, data() + pos, rlen ); + + return rlen; + } + + nssv_constexpr14 basic_string_view substr( size_type pos = 0, size_type n = npos ) const + { +#if nssv_CONFIG_NO_EXCEPTIONS + assert( pos <= size() ); +#else + if ( pos > size() ) + { + throw std::out_of_range("nonstd::string_view::substr()"); + } +#endif + return basic_string_view( data() + pos, (std::min)( n, size() - pos ) ); + } + + // compare(), 6x: + + nssv_constexpr14 int compare( basic_string_view other ) const nssv_noexcept // (1) + { +#if nssv_CPP17_OR_GREATER + if ( const int result = Traits::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#else + if ( const int result = detail::compare( data(), other.data(), (std::min)( size(), other.size() ) ) ) +#endif + { + return result; + } + + return size() == other.size() ? 0 : size() < other.size() ? -1 : 1; + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other ) const // (2) + { + return substr( pos1, n1 ).compare( other ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, basic_string_view other, size_type pos2, size_type n2 ) const // (3) + { + return substr( pos1, n1 ).compare( other.substr( pos2, n2 ) ); + } + + nssv_constexpr int compare( CharT const * s ) const // (4) + { + return compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s ) const // (5) + { + return substr( pos1, n1 ).compare( basic_string_view( s ) ); + } + + nssv_constexpr int compare( size_type pos1, size_type n1, CharT const * s, size_type n2 ) const // (6) + { + return substr( pos1, n1 ).compare( basic_string_view( s, n2 ) ); + } + + // 24.4.2.7 Searching: + + // starts_with(), 3x, since C++20: + + nssv_constexpr bool starts_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( 0, v.size(), v ) == 0; + } + + nssv_constexpr bool starts_with( CharT c ) const nssv_noexcept // (2) + { + return starts_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool starts_with( CharT const * s ) const // (3) + { + return starts_with( basic_string_view( s ) ); + } + + // ends_with(), 3x, since C++20: + + nssv_constexpr bool ends_with( basic_string_view v ) const nssv_noexcept // (1) + { + return size() >= v.size() && compare( size() - v.size(), npos, v ) == 0; + } + + nssv_constexpr bool ends_with( CharT c ) const nssv_noexcept // (2) + { + return ends_with( basic_string_view( &c, 1 ) ); + } + + nssv_constexpr bool ends_with( CharT const * s ) const // (3) + { + return ends_with( basic_string_view( s ) ); + } + + // find(), 4x: + + nssv_constexpr size_type find( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return assert( v.size() == 0 || v.data() != nssv_nullptr ) + , pos >= size() + ? npos : to_pos( +#if nssv_CPP11_OR_GREATER && ! nssv_CPP17_OR_GREATER + detail::search( substr(pos), v ) +#else + std::search( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) +#endif + ); + } + + nssv_constexpr size_type find( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find( CharT const * s, size_type pos = 0 ) const // (4) + { + return find( basic_string_view( s ), pos ); + } + + // rfind(), 4x: + + nssv_constexpr14 size_type rfind( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + if ( size() < v.size() ) + { + return npos; + } + + if ( v.empty() ) + { + return (std::min)( size(), pos ); + } + + const_iterator last = cbegin() + (std::min)( size() - v.size(), pos ) + v.size(); + const_iterator result = std::find_end( cbegin(), last, v.cbegin(), v.cend(), Traits::eq ); + + return result != last ? size_type( result - cbegin() ) : npos; + } + + nssv_constexpr14 size_type rfind( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return rfind( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos, size_type n ) const // (3) + { + return rfind( basic_string_view( s, n ), pos ); + } + + nssv_constexpr14 size_type rfind( CharT const * s, size_type pos = npos ) const // (4) + { + return rfind( basic_string_view( s ), pos ); + } + + // find_first_of(), 4x: + + nssv_constexpr size_type find_first_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_first_of( cbegin() + pos, cend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_first_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos, size_type n ) const // (3) + { + return find_first_of( basic_string_view( s, n ), pos ); + } + + nssv_constexpr size_type find_first_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_of( basic_string_view( s ), pos ); + } + + // find_last_of(), 4x: + + nssv_constexpr size_type find_last_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_of( v, size() - 1 ) + : to_pos( std::find_first_of( const_reverse_iterator( cbegin() + pos + 1 ), crend(), v.cbegin(), v.cend(), Traits::eq ) ); + } + + nssv_constexpr size_type find_last_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_of( basic_string_view( s ), pos ); + } + + // find_first_not_of(), 4x: + + nssv_constexpr size_type find_first_not_of( basic_string_view v, size_type pos = 0 ) const nssv_noexcept // (1) + { + return pos >= size() + ? npos + : to_pos( std::find_if( cbegin() + pos, cend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_first_not_of( CharT c, size_type pos = 0 ) const nssv_noexcept // (2) + { + return find_first_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_first_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_first_not_of( CharT const * s, size_type pos = 0 ) const // (4) + { + return find_first_not_of( basic_string_view( s ), pos ); + } + + // find_last_not_of(), 4x: + + nssv_constexpr size_type find_last_not_of( basic_string_view v, size_type pos = npos ) const nssv_noexcept // (1) + { + return empty() + ? npos + : pos >= size() + ? find_last_not_of( v, size() - 1 ) + : to_pos( std::find_if( const_reverse_iterator( cbegin() + pos + 1 ), crend(), not_in_view( v ) ) ); + } + + nssv_constexpr size_type find_last_not_of( CharT c, size_type pos = npos ) const nssv_noexcept // (2) + { + return find_last_not_of( basic_string_view( &c, 1 ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos, size_type count ) const // (3) + { + return find_last_not_of( basic_string_view( s, count ), pos ); + } + + nssv_constexpr size_type find_last_not_of( CharT const * s, size_type pos = npos ) const // (4) + { + return find_last_not_of( basic_string_view( s ), pos ); + } + + // Constants: + +#if nssv_CPP17_OR_GREATER + static nssv_constexpr size_type npos = size_type(-1); +#elif nssv_CPP11_OR_GREATER + enum : size_type { npos = size_type(-1) }; +#else + enum { npos = size_type(-1) }; +#endif + +private: + struct not_in_view + { + const basic_string_view v; + + nssv_constexpr explicit not_in_view( basic_string_view v_ ) : v( v_ ) {} + + nssv_constexpr bool operator()( CharT c ) const + { + return npos == v.find_first_of( c ); + } + }; + + nssv_constexpr size_type to_pos( const_iterator it ) const + { + return it == cend() ? npos : size_type( it - cbegin() ); + } + + nssv_constexpr size_type to_pos( const_reverse_iterator it ) const + { + return it == crend() ? npos : size_type( crend() - it - 1 ); + } + + nssv_constexpr const_reference data_at( size_type pos ) const + { +#if nssv_BETWEEN( nssv_COMPILER_GNUC_VERSION, 1, 500 ) + return data_[pos]; +#else + return assert( pos < size() ), data_[pos]; +#endif + } + +private: + const_pointer data_; + size_type size_; + +public: +#if nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS + + template< class Allocator > + basic_string_view( std::basic_string const & s ) nssv_noexcept + : data_( s.data() ) + , size_( s.size() ) + {} + +#if nssv_HAVE_EXPLICIT_CONVERSION + + template< class Allocator > + explicit operator std::basic_string() const + { + return to_string( Allocator() ); + } + +#endif // nssv_HAVE_EXPLICIT_CONVERSION + +#if nssv_CPP11_OR_GREATER + + template< class Allocator = std::allocator > + std::basic_string + to_string( Allocator const & a = Allocator() ) const + { + return std::basic_string( begin(), end(), a ); + } + +#else + + std::basic_string + to_string() const + { + return std::basic_string( begin(), end() ); + } + + template< class Allocator > + std::basic_string + to_string( Allocator const & a ) const + { + return std::basic_string( begin(), end(), a ); + } + +#endif // nssv_CPP11_OR_GREATER + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_CLASS_METHODS +}; + +// +// Non-member functions: +// + +// 24.4.3 Non-member comparison functions: +// lexicographically compare two string views (function template): + +template< class CharT, class Traits > +nssv_constexpr bool operator== ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator!= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits > +nssv_constexpr bool operator< ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator<= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator> ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits > +nssv_constexpr bool operator>= ( + basic_string_view lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +// Let S be basic_string_view, and sv be an instance of S. +// Implementations shall provide sufficient additional overloads marked +// constexpr and noexcept so that an object t with an implicit conversion +// to S can be compared according to Table 67. + +#if ! nssv_CPP11_OR_GREATER || nssv_BETWEEN( nssv_COMPILER_MSVC_VERSION, 100, 141 ) + +// accommodate for older compilers: + +// == + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.size() == detail::length( rhs ) && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return detail::length( lhs ) == rhs.size() && rhs.compare( lhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator==( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits> +nssv_constexpr bool operator!=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) > 0; } + +// <= + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator<=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) >= 0; } + +// > + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) < 0; } + +// >= + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + CharT const * rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + CharT const * lhs, + basic_string_view rhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + basic_string_view lhs, + std::basic_string rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits> +nssv_constexpr bool operator>=( + std::basic_string rhs, + basic_string_view lhs ) nssv_noexcept +{ return rhs.compare( lhs ) <= 0; } + +#else // newer compilers: + +#define nssv_BASIC_STRING_VIEW_I(T,U) typename std::decay< basic_string_view >::type + +#if defined(_MSC_VER) // issue 40 +# define nssv_MSVC_ORDER(x) , int=x +#else +# define nssv_MSVC_ORDER(x) /*, int=x*/ +#endif + +// == + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator==( + basic_string_view lhs, + nssv_BASIC_STRING_VIEW_I(CharT, Traits) rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator==( + nssv_BASIC_STRING_VIEW_I(CharT, Traits) lhs, + basic_string_view rhs ) nssv_noexcept +{ return lhs.size() == rhs.size() && lhs.compare( rhs ) == 0; } + +// != + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator!= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator!= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return !( lhs == rhs ); } + +// < + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator< ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator< ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) < 0; } + +// <= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator<= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator<= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) <= 0; } + +// > + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator> ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator> ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) > 0; } + +// >= + +template< class CharT, class Traits nssv_MSVC_ORDER(1) > +nssv_constexpr bool operator>= ( + basic_string_view < CharT, Traits > lhs, + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +template< class CharT, class Traits nssv_MSVC_ORDER(2) > +nssv_constexpr bool operator>= ( + nssv_BASIC_STRING_VIEW_I( CharT, Traits ) lhs, + basic_string_view < CharT, Traits > rhs ) nssv_noexcept +{ return lhs.compare( rhs ) >= 0; } + +#undef nssv_MSVC_ORDER +#undef nssv_BASIC_STRING_VIEW_I + +#endif // compiler-dependent approach to comparisons + +// 24.4.4 Inserters and extractors: + +#if ! nssv_CONFIG_NO_STREAM_INSERTION + +namespace detail { + +template< class Stream > +void write_padding( Stream & os, std::streamsize n ) +{ + for ( std::streamsize i = 0; i < n; ++i ) + os.rdbuf()->sputc( os.fill() ); +} + +template< class Stream, class View > +Stream & write_to_stream( Stream & os, View const & sv ) +{ + typename Stream::sentry sentry( os ); + + if ( !sentry ) + return os; + + const std::streamsize length = static_cast( sv.length() ); + + // Whether, and how, to pad: + const bool pad = ( length < os.width() ); + const bool left_pad = pad && ( os.flags() & std::ios_base::adjustfield ) == std::ios_base::right; + + if ( left_pad ) + write_padding( os, os.width() - length ); + + // Write span characters: + os.rdbuf()->sputn( sv.begin(), length ); + + if ( pad && !left_pad ) + write_padding( os, os.width() - length ); + + // Reset output stream width: + os.width( 0 ); + + return os; +} + +} // namespace detail + +template< class CharT, class Traits > +std::basic_ostream & +operator<<( + std::basic_ostream& os, + basic_string_view sv ) +{ + return detail::write_to_stream( os, sv ); +} + +#endif // nssv_CONFIG_NO_STREAM_INSERTION + +// Several typedefs for common character types are provided: + +typedef basic_string_view string_view; +typedef basic_string_view wstring_view; +#if nssv_HAVE_WCHAR16_T +typedef basic_string_view u16string_view; +typedef basic_string_view u32string_view; +#endif + +}} // namespace nonstd::sv_lite + +// +// 24.4.6 Suffix for basic_string_view literals: +// + +#if nssv_HAVE_USER_DEFINED_LITERALS + +namespace nonstd { +nssv_inline_ns namespace literals { +nssv_inline_ns namespace string_view_literals { + +#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS + +#if nssv_CONFIG_USR_SV_OPERATOR + +nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +{ + return nonstd::sv_lite::string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +{ + return nonstd::sv_lite::u16string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +{ + return nonstd::sv_lite::u32string_view{ str, len }; +} + +nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +{ + return nonstd::sv_lite::wstring_view{ str, len }; +} + +#endif // nssv_CONFIG_USR_SV_OPERATOR + +}}} // namespace nonstd::literals::string_view_literals + +#endif + +// +// Extensions for std::string: +// + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +namespace nonstd { +namespace sv_lite { + +// Exclude MSVC 14 (19.00): it yields ambiguous to_string(): + +#if nssv_CPP11_OR_GREATER && nssv_COMPILER_MSVC_VERSION != 140 + +template< class CharT, class Traits, class Allocator = std::allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a = Allocator() ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#else + +template< class CharT, class Traits > +std::basic_string +to_string( basic_string_view v ) +{ + return std::basic_string( v.begin(), v.end() ); +} + +template< class CharT, class Traits, class Allocator > +std::basic_string +to_string( basic_string_view v, Allocator const & a ) +{ + return std::basic_string( v.begin(), v.end(), a ); +} + +#endif // nssv_CPP11_OR_GREATER + +template< class CharT, class Traits, class Allocator > +basic_string_view +to_string_view( std::basic_string const & s ) +{ + return basic_string_view( s.data(), s.size() ); +} + +}} // namespace nonstd::sv_lite + +#endif // nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS + +// +// make types and algorithms available in namespace nonstd: +// + +namespace nonstd { + +using sv_lite::basic_string_view; +using sv_lite::string_view; +using sv_lite::wstring_view; + +#if nssv_HAVE_WCHAR16_T +using sv_lite::u16string_view; +#endif +#if nssv_HAVE_WCHAR32_T +using sv_lite::u32string_view; +#endif + +// literal "sv" + +using sv_lite::operator==; +using sv_lite::operator!=; +using sv_lite::operator<; +using sv_lite::operator<=; +using sv_lite::operator>; +using sv_lite::operator>=; + +#if ! nssv_CONFIG_NO_STREAM_INSERTION +using sv_lite::operator<<; +#endif + +#if nssv_CONFIG_CONVERSION_STD_STRING_FREE_FUNCTIONS +using sv_lite::to_string; +using sv_lite::to_string_view; +#endif + +} // namespace nonstd + +// 24.4.5 Hash support (C++11): + +// Note: The hash value of a string view object is equal to the hash value of +// the corresponding string object. + +#if nssv_HAVE_STD_HASH + +#include + +namespace std { + +template<> +struct hash< nonstd::string_view > +{ +public: + std::size_t operator()( nonstd::string_view v ) const nssv_noexcept + { + return std::hash()( std::string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::wstring_view > +{ +public: + std::size_t operator()( nonstd::wstring_view v ) const nssv_noexcept + { + return std::hash()( std::wstring( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u16string_view > +{ +public: + std::size_t operator()( nonstd::u16string_view v ) const nssv_noexcept + { + return std::hash()( std::u16string( v.data(), v.size() ) ); + } +}; + +template<> +struct hash< nonstd::u32string_view > +{ +public: + std::size_t operator()( nonstd::u32string_view v ) const nssv_noexcept + { + return std::hash()( std::u32string( v.data(), v.size() ) ); + } +}; + +} // namespace std + +#endif // nssv_HAVE_STD_HASH + +nssv_RESTORE_WARNINGS() + +#endif // nssv_HAVE_STD_STRING_VIEW +#endif // NONSTD_SV_LITE_H_INCLUDED +/* end file simdjson/nonstd/string_view.hpp */ +SIMDJSON_POP_DISABLE_WARNINGS + +namespace std { + using string_view = nonstd::string_view; +} +#endif // SIMDJSON_HAS_STRING_VIEW +#undef SIMDJSON_HAS_STRING_VIEW // We are not going to need this macro anymore. + +/// If EXPR is an error, returns it. +#define SIMDJSON_TRY(EXPR) { auto _err = (EXPR); if (_err) { return _err; } } + +// Unless the programmer has already set SIMDJSON_DEVELOPMENT_CHECKS, +// we want to set it under debug builds. We detect a debug build +// under Visual Studio when the _DEBUG macro is set. Under the other +// compilers, we use the fact that they define __OPTIMIZE__ whenever +// they allow optimizations. +// It is possible that this could miss some cases where SIMDJSON_DEVELOPMENT_CHECKS +// is helpful, but the programmer can set the macro SIMDJSON_DEVELOPMENT_CHECKS. +// It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer +// sets _DEBUG in a release build under Visual Studio, or if some compiler fails to +// set the __OPTIMIZE__ macro). +#ifndef SIMDJSON_DEVELOPMENT_CHECKS +#ifdef _MSC_VER +// Visual Studio seems to set _DEBUG for debug builds. +#ifdef _DEBUG +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // _DEBUG +#else // _MSC_VER +// All other compilers appear to set __OPTIMIZE__ to a positive integer +// when the compiler is optimizing. +#ifndef __OPTIMIZE__ +#define SIMDJSON_DEVELOPMENT_CHECKS 1 +#endif // __OPTIMIZE__ +#endif // _MSC_VER +#endif // SIMDJSON_DEVELOPMENT_CHECKS + +// The SIMDJSON_CHECK_EOF macro is a feature flag for the "don't require padding" +// feature. + +#if SIMDJSON_CPLUSPLUS17 +// if we have C++, then fallthrough is a default attribute +# define simdjson_fallthrough [[fallthrough]] +// check if we have __attribute__ support +#elif defined(__has_attribute) +// check if we have the __fallthrough__ attribute +#if __has_attribute(__fallthrough__) +// we are good to go: +# define simdjson_fallthrough __attribute__((__fallthrough__)) +#endif // __has_attribute(__fallthrough__) +#endif // SIMDJSON_CPLUSPLUS17 +// on some systems, we simply do not have support for fallthrough, so use a default: +#ifndef simdjson_fallthrough +# define simdjson_fallthrough do {} while (0) /* fallthrough */ +#endif // simdjson_fallthrough + +#if SIMDJSON_DEVELOPMENT_CHECKS +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { assert ((expr)); } while (0) +#else +#define SIMDJSON_DEVELOPMENT_ASSERT(expr) do { } while (0) +#endif + +#ifndef SIMDJSON_UTF8VALIDATION +#define SIMDJSON_UTF8VALIDATION 1 +#endif + +#ifdef __has_include +// How do we detect that a compiler supports vbmi2? +// For sure if the following header is found, we are ok? +#if __has_include() +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +#ifdef _MSC_VER +#if _MSC_VER >= 1920 +// Visual Studio 2019 and up support VBMI2 under x64 even if the header +// avx512vbmi2intrin.h is not found. +#define SIMDJSON_COMPILER_SUPPORTS_VBMI2 1 +#endif +#endif + +// By default, we allow AVX512. +#ifndef SIMDJSON_AVX512_ALLOWED +#define SIMDJSON_AVX512_ALLOWED 1 +#endif + +#endif // SIMDJSON_COMMON_DEFS_H +/* end file simdjson/common_defs.h */ + +// This provides the public API for simdjson. +// DOM and ondemand are amalgamated separately, in simdjson.h +/* including simdjson/simdjson_version.h: #include "simdjson/simdjson_version.h" */ +/* begin file simdjson/simdjson_version.h */ +// /include/simdjson/simdjson_version.h automatically generated by release.py, +// do not change by hand +#ifndef SIMDJSON_SIMDJSON_VERSION_H +#define SIMDJSON_SIMDJSON_VERSION_H + +/** The version of simdjson being used (major.minor.revision) */ +#define SIMDJSON_VERSION "3.2.3" + +namespace simdjson { +enum { + /** + * The major version (MAJOR.minor.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MAJOR = 3, + /** + * The minor version (major.MINOR.revision) of simdjson being used. + */ + SIMDJSON_VERSION_MINOR = 2, + /** + * The revision (major.minor.REVISION) of simdjson being used. + */ + SIMDJSON_VERSION_REVISION = 3 +}; +} // namespace simdjson + +#endif // SIMDJSON_SIMDJSON_VERSION_H +/* end file simdjson/simdjson_version.h */ + +/* including simdjson/base.h: #include "simdjson/base.h" */ +/* begin file simdjson/base.h */ +/** + * @file Base declarations for all simdjson headers + * @private + */ +#ifndef SIMDJSON_BASE_H +#define SIMDJSON_BASE_H + +/* skipped duplicate #include "simdjson/common_defs.h" */ +/* skipped duplicate #include "simdjson/compiler_check.h" */ +/* including simdjson/error.h: #include "simdjson/error.h" */ +/* begin file simdjson/error.h */ +#ifndef SIMDJSON_ERROR_H +#define SIMDJSON_ERROR_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#include +#include + +namespace simdjson { + +/** + * All possible errors returned by simdjson. These error codes are subject to change + * and not all simdjson kernel returns the same error code given the same input: it is not + * well defined which error a given input should produce. + * + * Only SUCCESS evaluates to false as a Boolean. All other error codes will evaluate + * to true as a Boolean. + */ +enum error_code { + SUCCESS = 0, ///< No error + CAPACITY, ///< This parser can't support a document that big + MEMALLOC, ///< Error allocating memory, most likely out of memory + TAPE_ERROR, ///< Something went wrong, this is a generic error + DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation + STRING_ERROR, ///< Problem while parsing a string + T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' + F_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'f' + N_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 'n' + NUMBER_ERROR, ///< Problem while parsing a number + UTF8_ERROR, ///< the input is not valid UTF-8 + UNINITIALIZED, ///< unknown error, or uninitialized document + EMPTY, ///< no structural element found + UNESCAPED_CHARS, ///< found unescaped characters in a string. + UNCLOSED_STRING, ///< missing quote at the end + UNSUPPORTED_ARCHITECTURE, ///< unsupported architecture + INCORRECT_TYPE, ///< JSON element has a different type than user expected + NUMBER_OUT_OF_RANGE, ///< JSON number does not fit in 64 bits + INDEX_OUT_OF_BOUNDS, ///< JSON array index too large + NO_SUCH_FIELD, ///< JSON field not found in object + IO_ERROR, ///< Error reading a file + INVALID_JSON_POINTER, ///< Invalid JSON pointer reference + INVALID_URI_FRAGMENT, ///< Invalid URI fragment + UNEXPECTED_ERROR, ///< indicative of a bug in simdjson + PARSER_IN_USE, ///< parser is already in use. + OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order + INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. + OUT_OF_BOUNDS, ///< Attempted to access location outside of document. + TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input + NUM_ERROR_CODES +}; + +/** + * Get the error message for the given error code. + * + * dom::parser parser; + * dom::element doc; + * auto error = parser.parse("foo",3).get(doc); + * if (error) { printf("Error: %s\n", error_message(error)); } + * + * @return The error message. + */ +inline const char *error_message(error_code error) noexcept; + +/** + * Write the error message to the output stream + */ +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept; + +/** + * Exception thrown when an exception-supporting simdjson method is called + */ +struct simdjson_error : public std::exception { + /** + * Create an exception from a simdjson error code. + * @param error The error code + */ + simdjson_error(error_code error) noexcept : _error{error} { } + /** The error message */ + const char *what() const noexcept { return error_message(error()); } + /** The error code */ + error_code error() const noexcept { return _error; } +private: + /** The error code that was used */ + error_code _error; +}; + +namespace internal { + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::simdjson_result_base { + * simdjson_result() noexcept : internal::simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct simdjson_result_base : protected std::pair { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result_base() noexcept; + + /** + * Create a new error result. + */ + simdjson_inline simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result_base + +} // namespace internal + +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + */ +template +struct simdjson_result : public internal::simdjson_result_base { + /** + * @private Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline simdjson_result() noexcept; + /** + * @private Create a new error result. + */ + simdjson_inline simdjson_result(T &&value) noexcept; + /** + * @private Create a new successful result. + */ + simdjson_inline simdjson_result(error_code error_code) noexcept; + /** + * @private Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline simdjson_result(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; + +}; // struct simdjson_result + +#if SIMDJSON_EXCEPTIONS + +template +inline std::ostream& operator<<(std::ostream& out, simdjson_result value) { return out << value.value(); } +#endif // SIMDJSON_EXCEPTIONS + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +/** + * @deprecated This is an alias and will be removed, use error_code instead + */ +using ErrorValues [[deprecated("This is an alias and will be removed, use error_code instead")]] = error_code; + +/** + * @deprecated Error codes should be stored and returned as `error_code`, use `error_message()` instead. + */ +[[deprecated("Error codes should be stored and returned as `error_code`, use `error_message()` instead.")]] +inline const std::string error_message(int error) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +} // namespace simdjson + +#endif // SIMDJSON_ERROR_H +/* end file simdjson/error.h */ +/* skipped duplicate #include "simdjson/portability.h" */ + +/** + * @brief The top level simdjson namespace, containing everything the library provides. + */ +namespace simdjson { + +SIMDJSON_PUSH_DISABLE_UNUSED_WARNINGS + +/** The maximum document size supported by simdjson. */ +constexpr size_t SIMDJSON_MAXSIZE_BYTES = 0xFFFFFFFF; + +/** + * The amount of padding needed in a buffer to parse JSON. + * + * The input buf should be readable up to buf + SIMDJSON_PADDING + * this is a stopgap; there should be a better description of the + * main loop and its behavior that abstracts over this + * See https://github.com/simdjson/simdjson/issues/174 + */ +constexpr size_t SIMDJSON_PADDING = 64; + +/** + * By default, simdjson supports this many nested objects and arrays. + * + * This is the default for parser::max_depth(). + */ +constexpr size_t DEFAULT_MAX_DEPTH = 1024; + +SIMDJSON_POP_DISABLE_UNUSED_WARNINGS + +class implementation; +struct padded_string; +class padded_string_view; +enum class stage1_mode; + +namespace internal { + +template +class atomic_ptr; +class dom_parser_implementation; +class escape_json_string; +class tape_ref; +struct value128; +enum class tape_type; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_BASE_H +/* end file simdjson/base.h */ + +/* skipped duplicate #include "simdjson/error.h" */ +/* including simdjson/error-inl.h: #include "simdjson/error-inl.h" */ +/* begin file simdjson/error-inl.h */ +#ifndef SIMDJSON_ERROR_INL_H +#define SIMDJSON_ERROR_INL_H + +/* skipped duplicate #include "simdjson/error.h" */ + +#include + +namespace simdjson { +namespace internal { + // We store the error code so we can validate the error message is associated with the right code + struct error_code_info { + error_code code; + const char* message; // do not use a fancy std::string where a simple C string will do (no alloc, no destructor) + }; + // These MUST match the codes in error_code. We check this constraint in basictests. + extern SIMDJSON_DLLIMPORTEXPORT const error_code_info error_codes[]; +} // namespace internal + + +inline const char *error_message(error_code error) noexcept { + // If you're using error_code, we're trusting you got it from the enum. + return internal::error_codes[int(error)].message; +} + +// deprecated function +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +inline const std::string error_message(int error) noexcept { + if (error < 0 || error >= error_code::NUM_ERROR_CODES) { + return internal::error_codes[UNEXPECTED_ERROR].message; + } + return internal::error_codes[error].message; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline std::ostream& operator<<(std::ostream& out, error_code error) noexcept { + return out << error_message(error); +} + +namespace internal { + +// +// internal::simdjson_result_base inline implementation +// + +template +simdjson_inline void simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T&& simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value, error_code error) noexcept + : std::pair(std::forward(value), error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(error_code error) noexcept + : simdjson_result_base(T{}, error) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base(T &&value) noexcept + : simdjson_result_base(std::forward(value), SUCCESS) {} +template +simdjson_inline simdjson_result_base::simdjson_result_base() noexcept + : simdjson_result_base(T{}, UNINITIALIZED) {} + +} // namespace internal + +/// +/// simdjson_result inline implementation +/// + +template +simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noexcept { + std::forward>(*this).tie(value, error); +} + +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { + return std::forward>(*this).get(value); +} + +template +simdjson_inline error_code simdjson_result::error() const noexcept { + return internal::simdjson_result_base::error(); +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result::value() & noexcept(false) { + return internal::simdjson_result_base::value(); +} + +template +simdjson_inline T&& simdjson_result::value() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T&& simdjson_result::take_value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline simdjson_result::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& simdjson_result::value_unsafe() const& noexcept { + return internal::simdjson_result_base::value_unsafe(); +} + +template +simdjson_inline T&& simdjson_result::value_unsafe() && noexcept { + return std::forward>(*this).value_unsafe(); +} + +template +simdjson_inline simdjson_result::simdjson_result(T &&value, error_code error) noexcept + : internal::simdjson_result_base(std::forward(value), error) {} +template +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +template +simdjson_inline simdjson_result::simdjson_result(T &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +template +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} + +} // namespace simdjson + +#endif // SIMDJSON_ERROR_INL_H +/* end file simdjson/error-inl.h */ +/* including simdjson/implementation.h: #include "simdjson/implementation.h" */ +/* begin file simdjson/implementation.h */ +#ifndef SIMDJSON_IMPLEMENTATION_H +#define SIMDJSON_IMPLEMENTATION_H + +/* including simdjson/internal/atomic_ptr.h: #include "simdjson/internal/atomic_ptr.h" */ +/* begin file simdjson/internal/atomic_ptr.h */ +#ifndef SIMDJSON_INTERNAL_ATOMIC_PTR_H +#define SIMDJSON_INTERNAL_ATOMIC_PTR_H + +/* skipped duplicate #include "simdjson/base.h" */ +#include + +namespace simdjson { +namespace internal { + +template +class atomic_ptr { +public: + atomic_ptr(T *_ptr) : ptr{_ptr} {} + + operator const T*() const { return ptr.load(); } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr.load(); } + + operator T*() { return ptr.load(); } + T& operator*() { return *ptr; } + T* operator->() { return ptr.load(); } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } + +private: + std::atomic ptr; +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_ATOMIC_PTR_H +/* end file simdjson/internal/atomic_ptr.h */ +/* including simdjson/internal/dom_parser_implementation.h: #include "simdjson/internal/dom_parser_implementation.h" */ +/* begin file simdjson/internal/dom_parser_implementation.h */ +#ifndef SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +#define SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* skipped duplicate #include "simdjson/error.h" */ +#include + +namespace simdjson { + +namespace dom { +class document; +} // namespace dom + +/** +* This enum is used with the dom_parser_implementation::stage1 function. +* 1) The regular mode expects a fully formed JSON document. +* 2) The streaming_partial mode expects a possibly truncated +* input within a stream on JSON documents. +* 3) The stream_final mode allows us to truncate final +* unterminated strings. It is useful in conjunction with streaming_partial. +*/ +enum class stage1_mode { regular, streaming_partial, streaming_final}; + +/** + * Returns true if mode == streaming_partial or mode == streaming_final + */ +inline bool is_streaming(stage1_mode mode) { + // performance note: it is probably faster to check that mode is different + // from regular than checking that it is either streaming_partial or streaming_final. + return (mode != stage1_mode::regular); + // return (mode == stage1_mode::streaming_partial || mode == stage1_mode::streaming_final); +} + + +namespace internal { + + +/** + * An implementation of simdjson's DOM parser for a particular CPU architecture. + * + * This class is expected to be accessed only by pointer, and never move in memory (though the + * pointer can move). + */ +class dom_parser_implementation { +public: + + /** + * @private For internal implementation use + * + * Run a full JSON parse on a single document (stage1 + stage2). + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param len The length of the json document. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 1 of the document parser. + * + * Guaranteed only to be called when capacity > document length. + * + * Overridden by each implementation. + * + * @param buf The json document to parse. + * @param len The length of the json document. + * @param streaming Whether this is being called by parser::parse_many. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage1(const uint8_t *buf, size_t len, stage1_mode streaming) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser. + * + * Called after stage1(). + * + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code stage2(dom::document &doc) noexcept = 0; + + /** + * @private For internal implementation use + * + * Stage 2 of the document parser for parser::parse_many. + * + * Guaranteed only to be called after stage1(). + * Overridden by each implementation. + * + * @param doc The document to output to. + * @return The error code, SUCCESS if there was no error, or EMPTY if all documents have been parsed. + */ + simdjson_warn_unused virtual error_code stage2_next(dom::document &doc) noexcept = 0; + + /** + * Unescape a valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a valid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @param allow_replacement whether we allow a replacement character when the UTF-8 contains unmatched surrogate pairs. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept = 0; + + /** + * Unescape a NON-valid UTF-8 string from src to dst, stopping at a final unescaped quote. There + * must be an unescaped quote terminating the string. It returns the final output + * position as pointer. In case of error (e.g., the string has bad escaped codes), + * then null_nullptrptr is returned. It is assumed that the output buffer is large + * enough. E.g., if src points at 'joe"', then dst needs to have four free bytes + + * SIMDJSON_PADDING bytes. + * + * Overridden by each implementation. + * + * @param str pointer to the beginning of a possibly invalid UTF-8 JSON string, must end with an unescaped quote. + * @param dst pointer to a destination buffer, it must point a region in memory of sufficient size. + * @return end of the of the written region (exclusive) or nullptr in case of error. + */ + simdjson_warn_unused virtual uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept = 0; + + /** + * Change the capacity of this parser. + * + * The capacity can never exceed SIMDJSON_MAXSIZE_BYTES (e.g., 4 GB) + * and an CAPACITY error is returned if it is attempted. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_capacity(size_t capacity) noexcept = 0; + + /** + * Change the max depth of this parser. + * + * Generally used for reallocation. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. + * @return The error code, or SUCCESS if there was no error. + */ + virtual error_code set_max_depth(size_t max_depth) noexcept = 0; + + /** + * Deallocate this parser. + */ + virtual ~dom_parser_implementation() = default; + + /** Number of structural indices passed from stage 1 to stage 2 */ + uint32_t n_structural_indexes{0}; + /** Structural indices passed from stage 1 to stage 2 */ + std::unique_ptr structural_indexes{}; + /** Next structural index to parse */ + uint32_t next_structural_index{0}; + + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_inline size_t capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth) noexcept; + + +protected: + /** + * The maximum document length this parser supports. + * + * Buffers are large enough to handle any document up to this length. + */ + size_t _capacity{0}; + + /** + * The maximum depth (number of nested objects and arrays) supported by this parser. + * + * Defaults to DEFAULT_MAX_DEPTH. + */ + size_t _max_depth{0}; + + // Declaring these so that subclasses can use them to implement their constructors. + simdjson_inline dom_parser_implementation() noexcept; + simdjson_inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + simdjson_inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + + simdjson_inline dom_parser_implementation(const dom_parser_implementation &) noexcept = delete; + simdjson_inline dom_parser_implementation &operator=(const dom_parser_implementation &other) noexcept = delete; +}; // class dom_parser_implementation + +simdjson_inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +simdjson_inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +simdjson_inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +simdjson_inline size_t dom_parser_implementation::capacity() const noexcept { + return _capacity; +} + +simdjson_inline size_t dom_parser_implementation::max_depth() const noexcept { + return _max_depth; +} + +simdjson_warn_unused +inline error_code dom_parser_implementation::allocate(size_t capacity, size_t max_depth) noexcept { + if (this->max_depth() != max_depth) { + error_code err = set_max_depth(max_depth); + if (err) { return err; } + } + if (_capacity != capacity) { + error_code err = set_capacity(capacity); + if (err) { return err; } + } + return SUCCESS; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/internal/dom_parser_implementation.h */ + +#include + +namespace simdjson { + +/** + * Validate the UTF-8 string. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if the string is valid UTF-8. + */ +simdjson_warn_unused bool validate_utf8(const char * buf, size_t len) noexcept; +/** + * Validate the UTF-8 string. + * + * @param sv the string_view to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string_view sv) noexcept { + return validate_utf8(sv.data(), sv.size()); +} + +/** + * Validate the UTF-8 string. + * + * @param p the string to validate. + * @return true if the string is valid UTF-8. + */ +simdjson_inline simdjson_warn_unused bool validate_utf8(const std::string& s) noexcept { + return validate_utf8(s.data(), s.size()); +} + +/** + * An implementation of simdjson for a particular CPU architecture. + * + * Also used to maintain the currently active implementation. The active implementation is + * automatically initialized on first use to the most advanced implementation supported by the host. + */ +class implementation { +public: + + /** + * The name of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the name of the implementation, e.g. "haswell", "westmere", "arm64". + */ + virtual const std::string &name() const { return _name; } + + /** + * The description of this implementation. + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @return the description of the implementation, e.g. "Intel/AMD AVX2", "Intel/AMD SSE4.2", "ARM NEON". + */ + virtual const std::string &description() const { return _description; } + + /** + * The instruction sets this implementation is compiled against + * and the current CPU match. This function may poll the current CPU/system + * and should therefore not be called too often if performance is a concern. + * + * @return true if the implementation can be safely used on the current system (determined at runtime). + */ + bool supported_by_runtime_system() const; + + /** + * @private For internal implementation use + * + * The instruction sets this implementation is compiled against. + * + * @return a mask of all required `internal::instruction_set::` values. + */ + virtual uint32_t required_instruction_sets() const { return _required_instruction_sets; } + + /** + * @private For internal implementation use + * + * const implementation *impl = simdjson::get_active_implementation(); + * cout << "simdjson is optimized for " << impl->name() << "(" << impl->description() << ")" << endl; + * + * @param capacity The largest document that will be passed to the parser. + * @param max_depth The maximum JSON object/array nesting this parser is expected to handle. + * @param dst The place to put the resulting parser implementation. + * @return the error code, or SUCCESS if there was no error. + */ + virtual error_code create_dom_parser_implementation( + size_t capacity, + size_t max_depth, + std::unique_ptr &dst + ) const noexcept = 0; + + /** + * @private For internal implementation use + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * + * Overridden by each implementation. + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len + SIMDJSON_PADDING bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ + simdjson_warn_unused virtual error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept = 0; + + + /** + * Validate the UTF-8 string. + * + * Overridden by each implementation. + * + * @param buf the string to validate. + * @param len the length of the string in bytes. + * @return true if and only if the string is valid UTF-8. + */ + simdjson_warn_unused virtual bool validate_utf8(const char *buf, size_t len) const noexcept = 0; + +protected: + /** @private Construct an implementation with the given name and description. For subclasses. */ + simdjson_inline implementation( + std::string_view name, + std::string_view description, + uint32_t required_instruction_sets + ) : + _name(name), + _description(description), + _required_instruction_sets(required_instruction_sets) + { + } + virtual ~implementation()=default; + +private: + /** + * The name of this implementation. + */ + const std::string _name; + + /** + * The description of this implementation. + */ + const std::string _description; + + /** + * Instruction sets required for this implementation. + */ + const uint32_t _required_instruction_sets; +}; + +/** @private */ +namespace internal { + +/** + * The list of available implementations compiled into simdjson. + */ +class available_implementation_list { +public: + /** Get the list of available implementations compiled into simdjson */ + simdjson_inline available_implementation_list() {} + /** Number of implementations */ + size_t size() const noexcept; + /** STL const begin() iterator */ + const implementation * const *begin() const noexcept; + /** STL const end() iterator */ + const implementation * const *end() const noexcept; + + /** + * Get the implementation with the given name. + * + * Case sensitive. + * + * const implementation *impl = simdjson::get_available_implementations()["westmere"]; + * if (!impl) { exit(1); } + * if (!imp->supported_by_runtime_system()) { exit(1); } + * simdjson::get_active_implementation() = impl; + * + * @param name the implementation to find, e.g. "westmere", "haswell", "arm64" + * @return the implementation, or nullptr if the parse failed. + */ + const implementation * operator[](const std::string_view &name) const noexcept { + for (const implementation * impl : *this) { + if (impl->name() == name) { return impl; } + } + return nullptr; + } + + /** + * Detect the most advanced implementation supported by the current host. + * + * This is used to initialize the implementation on startup. + * + * const implementation *impl = simdjson::available_implementation::detect_best_supported(); + * simdjson::get_active_implementation() = impl; + * + * @return the most advanced supported implementation for the current host, or an + * implementation that returns UNSUPPORTED_ARCHITECTURE if there is no supported + * implementation. Will never return nullptr. + */ + const implementation *detect_best_supported() const noexcept; +}; + +} // namespace internal + +/** + * The list of available implementations compiled into simdjson. + */ +extern SIMDJSON_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations(); + +/** + * The active implementation. + * + * Automatically initialized on first use to the most advanced implementation supported by this hardware. + */ +extern SIMDJSON_DLLIMPORTEXPORT internal::atomic_ptr& get_active_implementation(); + +} // namespace simdjson + +#endif // SIMDJSON_IMPLEMENTATION_H +/* end file simdjson/implementation.h */ +/* including simdjson/minify.h: #include "simdjson/minify.h" */ +/* begin file simdjson/minify.h */ +#ifndef SIMDJSON_MINIFY_H +#define SIMDJSON_MINIFY_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* including simdjson/padded_string.h: #include "simdjson/padded_string.h" */ +/* begin file simdjson/padded_string.h */ +#ifndef SIMDJSON_PADDED_STRING_H +#define SIMDJSON_PADDED_STRING_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* skipped duplicate #include "simdjson/error.h" */ + +/* skipped duplicate #include "simdjson/error-inl.h" */ + +#include +#include +#include +#include + +namespace simdjson { + +class padded_string_view; + +/** + * String with extra allocation for ease of use with parser::parse() + * + * This is a move-only class, it cannot be copied. + */ +struct padded_string final { + + /** + * Create a new, empty padded string. + */ + explicit inline padded_string() noexcept; + /** + * Create a new padded string buffer. + * + * @param length the size of the string. + */ + explicit inline padded_string(size_t length) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param data the buffer to copy + * @param length the number of bytes to copy + */ + explicit inline padded_string(const char *data, size_t length) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param str_ the string to copy + */ + inline padded_string(const std::string & str_ ) noexcept; + /** + * Create a new padded string by copying the given input. + * + * @param sv_ the string to copy + */ + inline padded_string(std::string_view sv_) noexcept; + /** + * Move one padded string into another. + * + * The original padded string will be reduced to zero capacity. + * + * @param o the string to move. + */ + inline padded_string(padded_string &&o) noexcept; + /** + * Move one padded string into another. + * + * The original padded string will be reduced to zero capacity. + * + * @param o the string to move. + */ + inline padded_string &operator=(padded_string &&o) noexcept; + inline void swap(padded_string &o) noexcept; + ~padded_string() noexcept; + + /** + * The length of the string. + * + * Does not include padding. + */ + size_t size() const noexcept; + + /** + * The length of the string. + * + * Does not include padding. + */ + size_t length() const noexcept; + + /** + * The string data. + **/ + const char *data() const noexcept; + const uint8_t *u8data() const noexcept { return static_cast(static_cast(data_ptr));} + + /** + * The string data. + **/ + char *data() noexcept; + + /** + * Create a std::string_view with the same content. + */ + operator std::string_view() const; + + /** + * Create a padded_string_view with the same content. + */ + operator padded_string_view() const noexcept; + + /** + * Load this padded string from a file. + * + * @return IO_ERROR on error. Be mindful that on some 32-bit systems, + * the file size might be limited to 2 GB. + * + * @param path the path to the file. + **/ + inline static simdjson_result load(std::string_view path) noexcept; + +private: + padded_string &operator=(const padded_string &o) = delete; + padded_string(const padded_string &o) = delete; + + size_t viable_size{0}; + char *data_ptr{nullptr}; + +}; // padded_string + +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, const padded_string& s) { return out << s.data(); } + +#if SIMDJSON_EXCEPTIONS +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string instance. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } +#endif + +} // namespace simdjson + +// This is deliberately outside of simdjson so that people get it without having to use the namespace +inline simdjson::padded_string operator "" _padded(const char *str, size_t len); + +namespace simdjson { +namespace internal { + +// The allocate_padded_buffer function is a low-level function to allocate memory +// with padding so we can read past the "length" bytes safely. It is used by +// the padded_string class automatically. It returns nullptr in case +// of error: the caller should check for a null pointer. +// The length parameter is the maximum size in bytes of the string. +// The caller is responsible to free the memory (e.g., delete[] (...)). +inline char *allocate_padded_buffer(size_t length) noexcept; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_H +/* end file simdjson/padded_string.h */ +#include +#include +#include + +namespace simdjson { + +/** + * + * Minify the input string assuming that it represents a JSON string, does not parse or validate. + * This function is much faster than parsing a JSON string and then writing a minified version of it. + * However, it does not validate the input. It will merely return an error in simple cases (e.g., if + * there is a string that was never terminated). + * + * + * @param buf the json document to minify. + * @param len the length of the json document. + * @param dst the buffer to write the minified document to. *MUST* be allocated up to len bytes. + * @param dst_len the number of bytes written. Output only. + * @return the error code, or SUCCESS if there was no error. + */ +simdjson_warn_unused error_code minify(const char *buf, size_t len, char *dst, size_t &dst_len) noexcept; + +} // namespace simdjson + +#endif // SIMDJSON_MINIFY_H +/* end file simdjson/minify.h */ +/* skipped duplicate #include "simdjson/padded_string.h" */ +/* including simdjson/padded_string-inl.h: #include "simdjson/padded_string-inl.h" */ +/* begin file simdjson/padded_string-inl.h */ +#ifndef SIMDJSON_PADDED_STRING_INL_H +#define SIMDJSON_PADDED_STRING_INL_H + +/* skipped duplicate #include "simdjson/padded_string.h" */ +/* including simdjson/padded_string_view.h: #include "simdjson/padded_string_view.h" */ +/* begin file simdjson/padded_string_view.h */ +#ifndef SIMDJSON_PADDED_STRING_VIEW_H +#define SIMDJSON_PADDED_STRING_VIEW_H + +/* skipped duplicate #include "simdjson/portability.h" */ +/* skipped duplicate #include "simdjson/base.h" // for SIMDJSON_PADDING */ +/* skipped duplicate #include "simdjson/error.h" */ + +#include +#include +#include +#include + +namespace simdjson { + +/** + * User-provided string that promises it has extra padded bytes at the end for use with parser::parse(). + */ +class padded_string_view : public std::string_view { +private: + size_t _capacity; + +public: + /** Create an empty padded_string_view. */ + inline padded_string_view() noexcept = default; + + /** + * Promise the given buffer has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * @param s The string. + * @param len The length of the string (not including padding). + * @param capacity The allocated length of the string, including padding. + */ + explicit inline padded_string_view(const char* s, size_t len, size_t capacity) noexcept; + /** overload explicit inline padded_string_view(const char* s, size_t len) noexcept */ + explicit inline padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept; + + /** + * Promise the given string has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * The capacity of the string will be used to determine its padding. + * + * @param s The string. + */ + explicit inline padded_string_view(const std::string &s) noexcept; + + /** + * Promise the given string_view has at least SIMDJSON_PADDING extra bytes allocated to it. + * + * @param s The string. + * @param capacity The allocated length of the string, including padding. + */ + explicit inline padded_string_view(std::string_view s, size_t capacity) noexcept; + + /** The number of allocated bytes. */ + inline size_t capacity() const noexcept; + + /** The amount of padding on the string (capacity() - length()) */ + inline size_t padding() const noexcept; + +}; // padded_string_view + +#if SIMDJSON_EXCEPTIONS +/** + * Send padded_string instance to an output stream. + * + * @param out The output stream. + * @param s The padded_string_view. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false); +#endif + +} // namespace simdjson + +#endif // SIMDJSON_PADDED_STRING_VIEW_H +/* end file simdjson/padded_string_view.h */ + +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* including simdjson/padded_string_view-inl.h: #include "simdjson/padded_string_view-inl.h" */ +/* begin file simdjson/padded_string_view-inl.h */ +#ifndef SIMDJSON_PADDED_STRING_VIEW_INL_H +#define SIMDJSON_PADDED_STRING_VIEW_INL_H + +/* skipped duplicate #include "simdjson/padded_string_view.h" */ + +/* skipped duplicate #include "simdjson/error-inl.h" */ + +namespace simdjson { + +inline padded_string_view::padded_string_view(const char* s, size_t len, size_t capacity) noexcept + : std::string_view(s, len), _capacity(capacity) +{ +} + +inline padded_string_view::padded_string_view(const uint8_t* s, size_t len, size_t capacity) noexcept + : padded_string_view(reinterpret_cast(s), len, capacity) +{ +} + +inline padded_string_view::padded_string_view(const std::string &s) noexcept + : std::string_view(s), _capacity(s.capacity()) +{ +} + +inline padded_string_view::padded_string_view(std::string_view s, size_t capacity) noexcept + : std::string_view(s), _capacity(capacity) +{ +} + +inline size_t padded_string_view::capacity() const noexcept { return _capacity; } + +inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); } + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &s) noexcept(false) { return out << s.value(); } +#endif + +} // namespace simdjson + + +#endif // SIMDJSON_PADDED_STRING_VIEW_INL_H +/* end file simdjson/padded_string_view-inl.h */ + +#include + +namespace simdjson { +namespace internal { + +// The allocate_padded_buffer function is a low-level function to allocate memory +// with padding so we can read past the "length" bytes safely. It is used by +// the padded_string class automatically. It returns nullptr in case +// of error: the caller should check for a null pointer. +// The length parameter is the maximum size in bytes of the string. +// The caller is responsible to free the memory (e.g., delete[] (...)). +inline char *allocate_padded_buffer(size_t length) noexcept { + const size_t totalpaddedlength = length + SIMDJSON_PADDING; + if(totalpaddedlength(1UL<<20)) { + return nullptr; + } +#endif + + char *padded_buffer = new (std::nothrow) char[totalpaddedlength]; + if (padded_buffer == nullptr) { + return nullptr; + } + // We write nulls in the padded region to avoid having uninitialized + // content which may trigger warning for some sanitizers + std::memset(padded_buffer + length, 0, totalpaddedlength - length); + return padded_buffer; +} // allocate_padded_buffer() + +} // namespace internal + + +inline padded_string::padded_string() noexcept = default; +inline padded_string::padded_string(size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { +} +inline padded_string::padded_string(const char *data, size_t length) noexcept + : viable_size(length), data_ptr(internal::allocate_padded_buffer(length)) { + if ((data != nullptr) && (data_ptr != nullptr)) { + std::memcpy(data_ptr, data, length); + } +} +// note: do not pass std::string arguments by value +inline padded_string::padded_string(const std::string & str_ ) noexcept + : viable_size(str_.size()), data_ptr(internal::allocate_padded_buffer(str_.size())) { + if (data_ptr != nullptr) { + std::memcpy(data_ptr, str_.data(), str_.size()); + } +} +// note: do pass std::string_view arguments by value +inline padded_string::padded_string(std::string_view sv_) noexcept + : viable_size(sv_.size()), data_ptr(internal::allocate_padded_buffer(sv_.size())) { + if(simdjson_unlikely(!data_ptr)) { + //allocation failed or zero size + viable_size = 0; + return; + } + if (sv_.size()) { + std::memcpy(data_ptr, sv_.data(), sv_.size()); + } +} +inline padded_string::padded_string(padded_string &&o) noexcept + : viable_size(o.viable_size), data_ptr(o.data_ptr) { + o.data_ptr = nullptr; // we take ownership +} + +inline padded_string &padded_string::operator=(padded_string &&o) noexcept { + delete[] data_ptr; + data_ptr = o.data_ptr; + viable_size = o.viable_size; + o.data_ptr = nullptr; // we take ownership + o.viable_size = 0; + return *this; +} + +inline void padded_string::swap(padded_string &o) noexcept { + size_t tmp_viable_size = viable_size; + char *tmp_data_ptr = data_ptr; + viable_size = o.viable_size; + data_ptr = o.data_ptr; + o.data_ptr = tmp_data_ptr; + o.viable_size = tmp_viable_size; +} + +inline padded_string::~padded_string() noexcept { + delete[] data_ptr; +} + +inline size_t padded_string::size() const noexcept { return viable_size; } + +inline size_t padded_string::length() const noexcept { return viable_size; } + +inline const char *padded_string::data() const noexcept { return data_ptr; } + +inline char *padded_string::data() noexcept { return data_ptr; } + +inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); } + +inline padded_string::operator padded_string_view() const noexcept { + return padded_string_view(data(), length(), length() + SIMDJSON_PADDING); +} + +inline simdjson_result padded_string::load(std::string_view filename) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(filename.data(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + int ret; +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + __int64 llen = _ftelli64(fp); + if(llen == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long llen = std::ftell(fp); + if((llen < 0) || (llen == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Allocate the padded_string + size_t len = static_cast(llen); + padded_string s(len); + if (s.data() == nullptr) { + std::fclose(fp); + return MEMALLOC; + } + + // Read the padded_string + std::rewind(fp); + size_t bytes_read = std::fread(s.data(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != len) { + return IO_ERROR; + } + + return s; +} + +} // namespace simdjson + +inline simdjson::padded_string operator "" _padded(const char *str, size_t len) { + return simdjson::padded_string(str, len); +} + +#endif // SIMDJSON_PADDED_STRING_INL_H +/* end file simdjson/padded_string-inl.h */ +/* skipped duplicate #include "simdjson/padded_string_view.h" */ +/* skipped duplicate #include "simdjson/padded_string_view-inl.h" */ + +/* including simdjson/dom.h: #include "simdjson/dom.h" */ +/* begin file simdjson/dom.h */ +#ifndef SIMDJSON_DOM_H +#define SIMDJSON_DOM_H + +/* including simdjson/dom/base.h: #include "simdjson/dom/base.h" */ +/* begin file simdjson/dom/base.h */ +#ifndef SIMDJSON_DOM_BASE_H +#define SIMDJSON_DOM_BASE_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { + +/** + * @brief A DOM API on top of the simdjson parser. + */ +namespace dom { + +/** The default batch size for parser.parse_many() and parser.load_many() */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * It is wasteful to allocate memory for tiny documents (e.g., 4 bytes). + */ +static constexpr size_t MINIMAL_DOCUMENT_CAPACITY = 32; + +class array; +class document; +class document_stream; +class element; +class key_value_pair; +class object; +class parser; + +#ifdef SIMDJSON_THREADS_ENABLED +struct stage1_worker; +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace dom + +namespace internal { + +template +class string_builder; +class tape_ref; + +} // namespace internal + +} // namespace simdjson + +#endif // SIMDJSON_DOM_BASE_H +/* end file simdjson/dom/base.h */ +/* including simdjson/dom/array.h: #include "simdjson/dom/array.h" */ +/* begin file simdjson/dom/array.h */ +#ifndef SIMDJSON_DOM_ARRAY_H +#define SIMDJSON_DOM_ARRAY_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* including simdjson/internal/tape_ref.h: #include "simdjson/internal/tape_ref.h" */ +/* begin file simdjson/internal/tape_ref.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_REF_H +#define SIMDJSON_INTERNAL_TAPE_REF_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { +namespace dom { +class document; +} // namespace dom + +namespace internal { + +/** + * A reference to an element on the tape. Internal only. + */ +class tape_ref { +public: + simdjson_inline tape_ref() noexcept; + simdjson_inline tape_ref(const dom::document *doc, size_t json_index) noexcept; + inline size_t after_element() const noexcept; + simdjson_inline tape_type tape_ref_type() const noexcept; + simdjson_inline uint64_t tape_value() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_false() const noexcept; + simdjson_inline bool is_true() const noexcept; + simdjson_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null. + simdjson_inline uint32_t matching_brace_index() const noexcept; + simdjson_inline uint32_t scope_count() const noexcept; + template + simdjson_inline T next_tape_value() const noexcept; + simdjson_inline uint32_t get_string_length() const noexcept; + simdjson_inline const char * get_c_str() const noexcept; + inline std::string_view get_string_view() const noexcept; + simdjson_inline bool is_document_root() const noexcept; + simdjson_inline bool usable() const noexcept; + + /** The document this element references. */ + const dom::document *doc; + + /** The index of this element on `doc.tape[]` */ + size_t json_index; +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_REF_H +/* end file simdjson/internal/tape_ref.h */ + +namespace simdjson { +namespace dom { + +/** + * JSON array. + */ +class array { +public: + /** Create a new, invalid array */ + simdjson_inline array() noexcept; + + class iterator { + public: + using value_type = element; + using difference_type = std::ptrdiff_t; + + /** + * Get the actual value + */ + inline value_type operator*() const noexcept; + /** + * Get the next value. + * + * Part of the std::iterator interface. + */ + inline iterator& operator++() noexcept; + /** + * Get the next value. + * + * Part of the std::iterator interface. + */ + inline iterator operator++(int) noexcept; + /** + * Check if these values come from the same place in the JSON. + * + * Part of the std::iterator interface. + */ + inline bool operator!=(const iterator& other) const noexcept; + inline bool operator==(const iterator& other) const noexcept; + + inline bool operator<(const iterator& other) const noexcept; + inline bool operator<=(const iterator& other) const noexcept; + inline bool operator>=(const iterator& other) const noexcept; + inline bool operator>(const iterator& other) const noexcept; + + iterator() noexcept = default; + iterator(const iterator&) noexcept = default; + iterator& operator=(const iterator&) noexcept = default; + private: + simdjson_inline iterator(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class array; + }; + + /** + * Return the first array element. + * + * Part of the std::iterable interface. + */ + inline iterator begin() const noexcept; + /** + * One past the last array element. + * + * Part of the std::iterable interface. + */ + inline iterator end() const noexcept; + /** + * Get the size of the array (number of immediate children). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** + * Get the total number of slots used by this array on the tape. + * + * Note that this is not the same thing as `size()`, which reports the + * number of actual elements within an array (not counting its children). + * + * Since an element can use 1 or 2 slots on the tape, you can only use this + * to figure out the total size of an array (including its children, + * recursively) if you know its structure ahead of time. + **/ + inline size_t number_of_slots() const noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * dom::parser parser; + * array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded); + * a.at_pointer("/0/foo/a/1") == 20 + * a.at_pointer("0")["foo"]["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity and + * is equivalent to the following: + * + * size_t i=0; + * for (auto element : *this) { + * if (i == index) { return element; } + * i++; + * } + * return INDEX_OUT_OF_BOUNDS; + * + * Avoid calling the at() function repeatedly. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + inline simdjson_result at(size_t index) const noexcept; + +private: + simdjson_inline array(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class element; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; +}; + + +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::array value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at(size_t index) const noexcept; + +#if SIMDJSON_EXCEPTIONS + inline dom::array::iterator begin() const noexcept(false); + inline dom::array::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + + + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +#include + +namespace std { +namespace ranges { +template<> +inline constexpr bool enable_view = true; +#if SIMDJSON_EXCEPTIONS +template<> +inline constexpr bool enable_view> = true; +#endif // SIMDJSON_EXCEPTIONS +} // namespace ranges +} // namespace std +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_DOM_ARRAY_H +/* end file simdjson/dom/array.h */ +/* including simdjson/dom/document_stream.h: #include "simdjson/dom/document_stream.h" */ +/* begin file simdjson/dom/document_stream.h */ +#ifndef SIMDJSON_DOCUMENT_STREAM_H +#define SIMDJSON_DOCUMENT_STREAM_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* including simdjson/dom/parser.h: #include "simdjson/dom/parser.h" */ +/* begin file simdjson/dom/parser.h */ +#ifndef SIMDJSON_DOM_PARSER_H +#define SIMDJSON_DOM_PARSER_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* including simdjson/dom/document.h: #include "simdjson/dom/document.h" */ +/* begin file simdjson/dom/document.h */ +#ifndef SIMDJSON_DOM_DOCUMENT_H +#define SIMDJSON_DOM_DOCUMENT_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ + +#include + +namespace simdjson { +namespace dom { + +/** + * A parsed JSON document. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + */ +class document { +public: + /** + * Create a document container with zero capacity. + * + * The parser will allocate capacity as needed. + */ + document() noexcept = default; + ~document() noexcept = default; + + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed and it is invalidated. + */ + document(document &&other) noexcept = default; + /** @private */ + document(const document &) = delete; // Disallow copying + /** + * Take another document's buffers. + * + * @param other The document to take. Its capacity is zeroed. + */ + document &operator=(document &&other) noexcept = default; + /** @private */ + document &operator=(const document &) = delete; // Disallow copying + + /** + * Get the root element of this document as a JSON array. + */ + element root() const noexcept; + + /** + * @private Dump the raw tape for debugging. + * + * @param os the stream to output to. + * @return false if the tape is likely wrong (e.g., you did not parse a valid JSON). + */ + bool dump_raw_tape(std::ostream &os) const noexcept; + + /** @private Structural values. */ + std::unique_ptr tape{}; + + /** @private String values. + * + * Should be at least byte_capacity. + */ + std::unique_ptr string_buf{}; + /** @private Allocate memory to support + * input JSON documents of up to len bytes. + * + * When calling this function, you lose + * all the data. + * + * The memory allocation is strict: you + * can you use this function to increase + * or lower the amount of allocated memory. + * Passsing zero clears the memory. + */ + error_code allocate(size_t len) noexcept; + /** @private Capacity in bytes, in terms + * of how many bytes of input JSON we can + * support. + */ + size_t capacity() const noexcept; + + +private: + size_t allocated_capacity{0}; + friend class parser; +}; // class document + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file simdjson/dom/document.h */ + +namespace simdjson { + +namespace dom { + +/** + * A persistent document parser. + * + * The parser is designed to be reused, holding the internal buffers necessary to do parsing, + * as well as memory for a single document. The parsed document is overwritten on each parse. + * + * This class cannot be copied, only moved, to avoid unintended allocations. + * + * @note Moving a parser instance may invalidate "dom::element" instances. If you need to + * preserve both the "dom::element" instances and the parser, consider wrapping the parser + * instance in a std::unique_ptr instance: + * + * std::unique_ptr parser(new dom::parser{}); + * auto error = parser->load(f).get(root); + * + * You can then move std::unique_ptr safely. + * + * @note This is not thread safe: one parser cannot produce two documents at the same time! + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + * + * @param max_capacity The maximum document length the parser can automatically handle. The parser + * will allocate more capacity on an as needed basis (when it sees documents too big to handle) + * up to this amount. The parser still starts with zero capacity no matter what this number is: + * to allocate an initial capacity, call allocate() after constructing the parser. + * Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process). + */ + simdjson_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + /** + * Take another parser's buffers and state. + * + * @param other The parser to take. Its capacity is zeroed. + */ + simdjson_inline parser(parser &&other) noexcept; + parser(const parser &) = delete; ///< @private Disallow copying + /** + * Take another parser's buffers and state. + * + * @param other The parser to take. Its capacity is zeroed. + */ + simdjson_inline parser &operator=(parser &&other) noexcept; + parser &operator=(const parser &) = delete; ///< @private Disallow copying + + /** Deallocate the JSON parser. */ + ~parser()=default; + + /** + * Load a JSON document from a file and return a reference to it. + * + * dom::parser parser; + * const element doc = parser.load("jsonexamples/twitter.json"); + * + * The function is eager: the file's content is loaded in memory inside the parser instance + * and immediately parsed. The file can be deleted after the `parser.load` call. + * + * ### IMPORTANT: Document Lifetime + * + * The JSON document still lives in the parser: this is the most efficient way to parse JSON + * documents because it reuses the same buffers, but you *must* use the document before you + * destroy the parser or call parse() again. + * + * Moving the parser instance is safe, but it invalidates the element instances. You may store + * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like + * so: `std::unique_ptr parser(new dom::parser{});`. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than the file length, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param path The path to load. + * @return The document, or an error: + * - IO_ERROR if there was an error opening or reading the file. + * Be mindful that on some 32-bit systems, + * the file size might be limited to 2 GB. + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result load(const std::string &path) & noexcept; + inline simdjson_result load(const std::string &path) && = delete ; + /** + * Parse a JSON document and return a temporary reference to it. + * + * dom::parser parser; + * element doc_root = parser.parse(buf, len); + * + * The function eagerly parses the input: the input can be modified and discarded after + * the `parser.parse(buf, len)` call has completed. + * + * ### IMPORTANT: Document Lifetime + * + * The JSON document still lives in the parser: this is the most efficient way to parse JSON + * documents because it reuses the same buffers, but you *must* use the document before you + * destroy the parser or call parse() again. + * + * Moving the parser instance is safe, but it invalidates the element instances. You may store + * the parser instance without moving it by wrapping it inside an `unique_ptr` instance like + * so: `std::unique_ptr parser(new dom::parser{});`. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * If realloc_if_needed is true (the default), it is assumed that the buffer does *not* have enough padding, + * and it is copied into an enlarged temporary buffer before parsing. Thus the following is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(json, json_len); + * + * If you set realloc_if_needed to false (e.g., parser.parse(json, json_len, false)), + * you must provide a buffer with at least SIMDJSON_PADDING extra bytes at the end. + * The benefit of setting realloc_if_needed to false is that you avoid a temporary + * memory allocation and a copy. + * + * The padded bytes may be read. It is not important how you initialize + * these bytes though we recommend a sensible default like null character values or spaces. + * For example, the following low-level code is safe: + * + * const char *json = R"({"key":"value"})"; + * const size_t json_len = std::strlen(json); + * std::unique_ptr padded_json_copy{new char[json_len + SIMDJSON_PADDING]}; + * std::memcpy(padded_json_copy.get(), json, json_len); + * std::memset(padded_json_copy.get() + json_len, '\0', SIMDJSON_PADDING); + * simdjson::dom::parser parser; + * simdjson::dom::element element = parser.parse(padded_json_copy.get(), json_len, false); + * + * ### Parser Capacity + * + * If the parser's current capacity is less than len, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless + * realloc_if_needed is true. + * @param len The length of the JSON. + * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. + * @return An element pointing at the root of the document, or an error: + * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, + * and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; + inline simdjson_result parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const std::string &s) & noexcept; + simdjson_inline simdjson_result parse(const std::string &s) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const padded_string &s) & noexcept; + simdjson_inline simdjson_result parse(const padded_string &s) && =delete; + /** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse(const padded_string_view &v) & noexcept; + simdjson_inline simdjson_result parse(const padded_string_view &v) && =delete; + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_inline simdjson_result parse(const char *buf) noexcept = delete; + + /** + * Parse a JSON document into a provide document instance and return a temporary reference to it. + * It is similar to the function `parse` except that instead of parsing into the internal + * `document` instance associated with the parser, it allows the user to provide a document + * instance. + * + * dom::parser parser; + * dom::document doc; + * element doc_root = parser.parse_into_document(doc, buf, len); + * + * The function eagerly parses the input: the input can be modified and discarded after + * the `parser.parse(buf, len)` call has completed. + * + * ### IMPORTANT: Document Lifetime + * + * After the call to parse_into_document, the parser is no longer needed. + * + * The JSON document lives in the document instance: you must keep the document + * instance alive while you navigate through it (i.e., used the returned value from + * parse_into_document). You are encourage to reuse the document instance + * many times with new data to avoid reallocations: + * + * dom::document doc; + * element doc_root1 = parser.parse_into_document(doc, buf1, len); + * //... doc_root1 is a pointer inside doc + * element doc_root2 = parser.parse_into_document(doc, buf1, len); + * //... doc_root2 is a pointer inside doc + * // at this point doc_root1 is no longer safe + * + * Moving the document instance is safe, but it invalidates the element instances. After + * moving a document, you can recover safe access to the document root with its `root()` method. + * + * @param doc The document instance where the parsed data will be stored (on success). + * @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless + * realloc_if_needed is true. + * @param len The length of the JSON. + * @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding. + * @return An element pointing at the root of document, or an error: + * - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity, + * and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and len > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept; + inline simdjson_result parse_into_document(document& doc, const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf, size_t len, bool realloc_if_needed = true) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const std::string &s) && =delete; + /** @overload parse_into_document(const uint8_t *buf, size_t len, bool realloc_if_needed) */ + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) & noexcept; + simdjson_inline simdjson_result parse_into_document(document& doc, const padded_string &s) && =delete; + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_inline simdjson_result parse_into_document(document& doc, const char *buf) noexcept = delete; + + /** + * Load a file containing many JSON documents. + * + * dom::parser parser; + * for (const element doc : parser.load_many(path)) { + * cout << std::string(doc["title"]) << endl; + * } + * + * The file is loaded in memory and can be safely deleted after the `parser.load_many(path)` + * function has returned. The memory is held by the `parser` instance. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * And, possibly, no document many have been parsed when the `parser.load_many(path)` function + * returned. + * + * ### Format + * + * The file must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * Documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. + * + * ### Error Handling + * + * All errors are returned during iteration: if there is a global error such as memory allocation, + * it will be yielded as the first result. Iteration always stops after the first error. + * + * As with all other simdjson methods, non-exception error handling is readily available through + * the same interface, requiring you to check the error before using the document: + * + * dom::parser parser; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; + * } + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param path File name pointing at the concatenated JSON to parse. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 1MB (as simdjson::dom::DEFAULT_BATCH_SIZE), which has been a reasonable sweet + * spot in our tests. + * If you set the batch_size to a value smaller than simdjson::dom::MINIMAL_BATCH_SIZE + * (currently 32B), it will be replaced by simdjson::dom::MINIMAL_BATCH_SIZE. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - IO_ERROR if there was an error opening or reading the file. + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails. + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + + /** + * Parse a buffer containing many JSON documents. + * + * dom::parser parser; + * for (element doc : parser.parse_many(buf, len)) { + * cout << std::string(doc["title"]) << endl; + * } + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * And, possibly, no document many have been parsed when the `parser.load_many(path)` function + * returned. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. In particular, the following is unsafe and will not compile: + * + * auto docs = parser.parse_many("[\"temporary data\"]"_padded); + * // here the string "[\"temporary data\"]" may no longer exist in memory + * // the parser instance may not have even accessed the input yet + * for (element doc : docs) { + * cout << std::string(doc["title"]) << endl; + * } + * + * The following is safe: + * + * auto json = "[\"temporary data\"]"_padded; + * auto docs = parser.parse_many(json); + * for (element doc : docs) { + * cout << std::string(doc["title"]) << endl; + * } + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with whitespace. + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excesively small values may impact negatively the + * performance. + * + * ### Error Handling + * + * All errors are returned during iteration: if there is a global error such as memory allocation, + * it will be yielded as the first result. Iteration always stops after the first error. + * + * As with all other simdjson methods, non-exception error handling is readily available through + * the same interface, requiring you to check the error before using the document: + * + * dom::parser parser; + * dom::document_stream docs; + * auto error = parser.load_many(path).get(docs); + * if (error) { cerr << error << endl; exit(1); } + * for (auto doc : docs) { + * std::string_view title; + * if ((error = doc["title"].get(title)) { cerr << error << endl; exit(1); } + * cout << title << endl; + * } + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result parse_many(const uint8_t *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const char *buf, size_t len, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const std::string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const std::string &&s, size_t batch_size) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result parse_many(const padded_string &s, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result parse_many(const padded_string &&s, size_t batch_size) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result parse_many(const char *buf, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept = delete; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + /** + * @private deprecated because it returns bool instead of error_code, which is our standard for + * failures. Use allocate() instead. + * + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return true if successful, false if allocation failed. + */ + [[deprecated("Use allocate() instead.")]] + simdjson_warn_unused inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API + /** + * The largest document this parser can support without reallocating. + * + * @return Current capacity, in bytes. + */ + simdjson_inline size_t capacity() const noexcept; + + /** + * The largest document this parser can automatically support. + * + * The parser may reallocate internal buffers as needed up to this amount. + * + * @return Maximum capacity, in bytes. + */ + simdjson_inline size_t max_capacity() const noexcept; + + /** + * The maximum level of nested object and arrays supported by this parser. + * + * @return Maximum depth, in bytes. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Set max_capacity. This is the largest document this parser can automatically support. + * + * The parser may reallocate internal buffers as needed up to this amount as documents are passed + * to it. + * + * Note: To avoid limiting the memory to an absurd value, such as zero or two bytes, + * iff you try to set max_capacity to a value lower than MINIMAL_DOCUMENT_CAPACITY, + * then the maximal capacity is set to MINIMAL_DOCUMENT_CAPACITY. + * + * This call will not allocate or deallocate, even if capacity is currently above max_capacity. + * + * @param max_capacity The new maximum capacity, in bytes. + */ + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + +#ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; +#endif + /** @private Use the new DOM API instead */ + class Iterator; + /** @private Use simdjson_error instead */ + using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error; + + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + + /** @private Use `if (parser.parse(...).error())` instead */ + bool valid{false}; + /** @private Use `parser.parse(...).error()` instead */ + error_code error{UNINITIALIZED}; + + /** @private Use `parser.parse(...).value()` instead */ + document doc{}; + + /** @private returns true if the document parsed was valid */ + [[deprecated("Use the result of parser.parse() instead")]] + inline bool is_valid() const noexcept; + + /** + * @private return an error code corresponding to the last parsing attempt, see + * simdjson.h will return UNINITIALIZED if no parsing was attempted + */ + [[deprecated("Use the result of parser.parse() instead")]] + inline int get_error_code() const noexcept; + + /** @private return the string equivalent of "get_error_code" */ + [[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]] + inline std::string get_error_message() const noexcept; + + /** @private */ + [[deprecated("Use cout << on the result of parser.parse() instead")]] + inline bool print_json(std::ostream &os) const noexcept; + + /** @private Private and deprecated: use `parser.parse(...).doc.dump_raw_tape()` instead */ + inline bool dump_raw_tape(std::ostream &os) const noexcept; + + +private: + /** + * The maximum document length this parser will automatically support. + * + * The parser will not be automatically allocated above this amount. + */ + size_t _max_capacity; + + /** + * The loaded buffer (reused each time load() is called) + */ + std::unique_ptr loaded_bytes; + + /** Capacity of loaded_bytes buffer. */ + size_t _loaded_bytes_capacity{0}; + + // all nodes are stored on the doc.tape using a 64-bit word. + // + // strings, double and ints are stored as + // a 64-bit word with a pointer to the actual value + // + // + // + // for objects or arrays, store [ or { at the beginning and } and ] at the + // end. For the openings ([ or {), we annotate them with a reference to the + // location on the doc.tape of the end, and for then closings (} and ]), we + // annotate them with a reference to the location of the opening + // + // + + /** + * Ensure we have enough capacity to handle at least desired_capacity bytes, + * and auto-allocate if not. This also allocates memory if needed in the + * internal document. + */ + inline error_code ensure_capacity(size_t desired_capacity) noexcept; + /** + * Ensure we have enough capacity to handle at least desired_capacity bytes, + * and auto-allocate if not. This also allocates memory if needed in the + * provided document. + */ + inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept; + + /** Read the file into loaded_bytes */ + inline simdjson_result read_file(const std::string &path) noexcept; + + friend class parser::Iterator; + friend class document_stream; + + +}; // class parser + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOM_PARSER_H +/* end file simdjson/dom/parser.h */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace dom { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, dom::parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + dom::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; +}; +#endif + +/** + * A forward-only stream of documents. + * + * Produced by parser::parse_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * error = parser.parse_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.parse_many(json,window); + * for(auto doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + /** + * An iterator through a forward-only stream of documents. + */ + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline reference operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + /** + * @private + * + * Gives a view of the current document. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * std::string_view v = i->source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline std::string_view source() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + friend class document_stream; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + dom::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** + * Pass the next batch through stage 1 and return when finished. + * When threads are enabled, this may wait for the stage 1 thread to finish. + */ + inline void load_batch() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(dom::parser &p, size_t batch_start) noexcept; + + dom::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; +#ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + friend struct stage1_worker; + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + dom::parser stage1_thread_parser{}; +#endif // SIMDJSON_THREADS_ENABLED + + friend class dom::parser; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; + +}; // class document_stream + +} // namespace dom + +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result(dom::document_stream &&value) noexcept; ///< @private + +#if SIMDJSON_EXCEPTIONS + simdjson_inline dom::document_stream::iterator begin() noexcept(false); + simdjson_inline dom::document_stream::iterator end() noexcept(false); +#else // SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + simdjson_inline dom::document_stream::iterator begin() noexcept; + [[deprecated("parse_many() and load_many() may return errors. Use document_stream stream; error = parser.parse_many().get(doc); instead.")]] + simdjson_inline dom::document_stream::iterator end() noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS +}; // struct simdjson_result + +} // namespace simdjson + +#endif // SIMDJSON_DOCUMENT_STREAM_H +/* end file simdjson/dom/document_stream.h */ +/* skipped duplicate #include "simdjson/dom/document.h" */ +/* including simdjson/dom/element.h: #include "simdjson/dom/element.h" */ +/* begin file simdjson/dom/element.h */ +#ifndef SIMDJSON_DOM_ELEMENT_H +#define SIMDJSON_DOM_ELEMENT_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/array.h" */ + +namespace simdjson { +namespace dom { + +/** + * The actual concrete type of a JSON element + * This is the type it is most easily cast to with get<>. + */ +enum class element_type { + ARRAY = '[', ///< dom::array + OBJECT = '{', ///< dom::object + INT64 = 'l', ///< int64_t + UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t + DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double. + STRING = '"', ///< std::string_view + BOOL = 't', ///< bool + NULL_VALUE = 'n' ///< null +}; + +/** + * A JSON element. + * + * References an element in a JSON document, representing a JSON null, boolean, string, number, + * array or object. + */ +class element { +public: + /** Create a new, invalid element. */ + simdjson_inline element() noexcept; + + /** The type of this element. */ + simdjson_inline element_type type() const noexcept; + + /** + * Cast this element to an array. + * + * @returns An object that can be used to iterate the array, or: + * INCORRECT_TYPE if the JSON element is not an array. + */ + inline simdjson_result get_array() const noexcept; + /** + * Cast this element to an object. + * + * @returns An object that can be used to look up or iterate the object's fields, or: + * INCORRECT_TYPE if the JSON element is not an object. + */ + inline simdjson_result get_object() const noexcept; + /** + * Cast this element to a null-terminated C string. + * + * The string is guaranteed to be valid UTF-8. + * + * The length of the string is given by get_string_length(). Because JSON strings + * may contain null characters, it may be incorrect to use strlen to determine the + * string length. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A pointer to a null-terminated UTF-8 string. This string is stored in the parser and will + * be invalidated the next time it parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_c_str() const noexcept; + /** + * Gives the length in bytes of the string. + * + * It is possible to get a single string_view instance which represents both the string + * content and its length: see get_string(). + * + * @returns A string length in bytes. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string_length() const noexcept; + /** + * Cast this element to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next time it + * parses a document or when it is destroyed. + * Returns INCORRECT_TYPE if the JSON element is not a string. + */ + inline simdjson_result get_string() const noexcept; + /** + * Cast this element to a signed integer. + * + * @returns A signed 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is negative. + */ + inline simdjson_result get_int64() const noexcept; + /** + * Cast this element to an unsigned integer. + * + * @returns An unsigned 64-bit integer. + * Returns INCORRECT_TYPE if the JSON element is not an integer, or NUMBER_OUT_OF_RANGE + * if it is too large. + */ + inline simdjson_result get_uint64() const noexcept; + /** + * Cast this element to a double floating-point. + * + * @returns A double value. + * Returns INCORRECT_TYPE if the JSON element is not a number. + */ + inline simdjson_result get_double() const noexcept; + /** + * Cast this element to a bool. + * + * @returns A bool value. + * Returns INCORRECT_TYPE if the JSON element is not a boolean. + */ + inline simdjson_result get_bool() const noexcept; + + /** + * Whether this element is a json array. + * + * Equivalent to is(). + */ + inline bool is_array() const noexcept; + /** + * Whether this element is a json object. + * + * Equivalent to is(). + */ + inline bool is_object() const noexcept; + /** + * Whether this element is a json string. + * + * Equivalent to is() or is(). + */ + inline bool is_string() const noexcept; + /** + * Whether this element is a json number that fits in a signed 64-bit integer. + * + * Equivalent to is(). + */ + inline bool is_int64() const noexcept; + /** + * Whether this element is a json number that fits in an unsigned 64-bit integer. + * + * Equivalent to is(). + */ + inline bool is_uint64() const noexcept; + /** + * Whether this element is a json number that fits in a double. + * + * Equivalent to is(). + */ + inline bool is_double() const noexcept; + + /** + * Whether this element is a json number. + * + * Both integers and floating points will return true. + */ + inline bool is_number() const noexcept; + + /** + * Whether this element is a json `true` or `false`. + * + * Equivalent to is(). + */ + inline bool is_bool() const noexcept; + /** + * Whether this element is a json `null`. + */ + inline bool is_null() const noexcept; + + /** + * Tell whether the value can be cast to provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + */ + template + simdjson_inline bool is() const noexcept; + + /** + * Get the value as the provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array() or get_string() instead. + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @returns The value cast to the given type, or: + * INCORRECT_TYPE if the value cannot be cast to the given type. + */ + + template + inline simdjson_result get() const noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get the value as the provided type (T). + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @param value The variable to set to the value. May not be set if there is an error. + * + * @returns The error that occurred, or SUCCESS if there was no error. + */ + template + simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; + + /** + * Get the value as the provided type (T), setting error if it's not the given type. + * + * Supported types: + * - Boolean: bool + * - Number: double, uint64_t, int64_t + * - String: std::string_view, const char * + * - Array: dom::array + * - Object: dom::object + * + * @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object + * + * @param value The variable to set to the given type. value is undefined if there is an error. + * @param error The variable to store the error. error is set to error_code::SUCCEED if there is an error. + */ + template + inline void tie(T &value, error_code &error) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Read this element as a boolean. + * + * @return The boolean value + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean. + */ + inline operator bool() const noexcept(false); + + /** + * Read this element as a null-terminated UTF-8 string. + * + * Be mindful that JSON allows strings to contain null characters. + * + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. + * + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. + */ + inline explicit operator const char*() const noexcept(false); + + /** + * Read this element as a null-terminated UTF-8 string. + * + * Does *not* convert other types to a string; requires that the JSON type of the element was + * an actual string. + * + * @return The string value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string. + */ + inline operator std::string_view() const noexcept(false); + + /** + * Read this element as an unsigned integer. + * + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative + */ + inline operator uint64_t() const noexcept(false); + /** + * Read this element as an signed integer. + * + * @return The integer value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits + */ + inline operator int64_t() const noexcept(false); + /** + * Read this element as an double. + * + * @return The double value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number + * @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative + */ + inline operator double() const noexcept(false); + /** + * Read this element as a JSON array. + * + * @return The JSON array. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline operator array() const noexcept(false); + /** + * Read this element as a JSON object (key/value pairs). + * + * @return The JSON object. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object + */ + inline operator object() const noexcept(false); + + /** + * Iterate over each element in this array. + * + * @return The beginning of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline dom::array::iterator begin() const noexcept(false); + + /** + * Iterate over each element in this array. + * + * @return The end of the iteration. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array + */ + inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](std::string_view key) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](const char *key) const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * dom::parser parser; + * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * doc.at_pointer("/foo/a/1") == 20 + * doc.at_pointer("/foo")["a"].at(1) == 20 + * doc.at_pointer("")["foo"]["a"].at(1) == 20 + * + * It is allowed for a key to be the empty string: + * + * dom::parser parser; + * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("//a/1") == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + /** + * + * Version 0.4 of simdjson used an incorrect interpretation of the JSON Pointer standard + * and allowed the following : + * + * dom::parser parser; + * element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * doc.at("foo/a/1") == 20 + * + * Though it is intuitive, it is not compliant with RFC 6901 + * https://tools.ietf.org/html/rfc6901 + * + * For standard compliance, use the at_pointer function instead. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] + inline simdjson_result at(const std::string_view json_pointer) const noexcept; +#endif // SIMDJSON_DISABLE_DEPRECATED_API + + /** + * Get the value at the given index. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + inline simdjson_result at(size_t index) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key(std::string_view key) const noexcept; + + /** + * Get the value associated with the given key in a case-insensitive manner. + * + * Note: The key will be matched against **unescaped** JSON. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + + /** @private for debugging. Prints out the root element. */ + inline bool dump_raw_tape(std::ostream &out) const noexcept; + +private: + simdjson_inline element(const internal::tape_ref &tape) noexcept; + internal::tape_ref tape; + friend class document; + friend class object; + friend class array; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; + +}; + +} // namespace dom + +/** The result of a JSON navigation that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::element &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result type() const noexcept; + template + simdjson_inline bool is() const noexcept; + template + simdjson_inline simdjson_result get() const noexcept; + template + simdjson_warn_unused simdjson_inline error_code get(T &value) const noexcept; + + simdjson_inline simdjson_result get_array() const noexcept; + simdjson_inline simdjson_result get_object() const noexcept; + simdjson_inline simdjson_result get_c_str() const noexcept; + simdjson_inline simdjson_result get_string_length() const noexcept; + simdjson_inline simdjson_result get_string() const noexcept; + simdjson_inline simdjson_result get_int64() const noexcept; + simdjson_inline simdjson_result get_uint64() const noexcept; + simdjson_inline simdjson_result get_double() const noexcept; + simdjson_inline simdjson_result get_bool() const noexcept; + + simdjson_inline bool is_array() const noexcept; + simdjson_inline bool is_object() const noexcept; + simdjson_inline bool is_string() const noexcept; + simdjson_inline bool is_int64() const noexcept; + simdjson_inline bool is_uint64() const noexcept; + simdjson_inline bool is_double() const noexcept; + simdjson_inline bool is_number() const noexcept; + simdjson_inline bool is_bool() const noexcept; + simdjson_inline bool is_null() const noexcept; + + simdjson_inline simdjson_result operator[](std::string_view key) const noexcept; + simdjson_inline simdjson_result operator[](const char *key) const noexcept; + simdjson_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] + simdjson_inline simdjson_result at(const std::string_view json_pointer) const noexcept; + simdjson_inline simdjson_result at(size_t index) const noexcept; + simdjson_inline simdjson_result at_key(std::string_view key) const noexcept; + simdjson_inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator bool() const noexcept(false); + simdjson_inline explicit operator const char*() const noexcept(false); + simdjson_inline operator std::string_view() const noexcept(false); + simdjson_inline operator uint64_t() const noexcept(false); + simdjson_inline operator int64_t() const noexcept(false); + simdjson_inline operator double() const noexcept(false); + simdjson_inline operator dom::array() const noexcept(false); + simdjson_inline operator dom::object() const noexcept(false); + + simdjson_inline dom::array::iterator begin() const noexcept(false); + simdjson_inline dom::array::iterator end() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + +} // namespace simdjson + +#endif // SIMDJSON_DOM_DOCUMENT_H +/* end file simdjson/dom/element.h */ +/* including simdjson/dom/object.h: #include "simdjson/dom/object.h" */ +/* begin file simdjson/dom/object.h */ +#ifndef SIMDJSON_DOM_OBJECT_H +#define SIMDJSON_DOM_OBJECT_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref.h" */ + +namespace simdjson { +namespace dom { + +/** + * JSON object. + */ +class object { +public: + /** Create a new, invalid object */ + simdjson_inline object() noexcept; + + class iterator { + public: + using value_type = key_value_pair; + using difference_type = std::ptrdiff_t; + + /** + * Get the actual key/value pair + */ + inline const value_type operator*() const noexcept; + /** + * Get the next key/value pair. + * + * Part of the std::iterator interface. + * + */ + inline iterator& operator++() noexcept; + /** + * Get the next key/value pair. + * + * Part of the std::iterator interface. + * + */ + inline iterator operator++(int) noexcept; + /** + * Check if these values come from the same place in the JSON. + * + * Part of the std::iterator interface. + */ + inline bool operator!=(const iterator& other) const noexcept; + inline bool operator==(const iterator& other) const noexcept; + + inline bool operator<(const iterator& other) const noexcept; + inline bool operator<=(const iterator& other) const noexcept; + inline bool operator>=(const iterator& other) const noexcept; + inline bool operator>(const iterator& other) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline std::string_view key() const noexcept; + /** + * Get the length (in bytes) of the key in this key/value pair. + * You should expect this function to be faster than key().size(). + */ + inline uint32_t key_length() const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view. + */ + inline bool key_equals(std::string_view o) const noexcept; + /** + * Returns true if the key in this key/value pair is equal + * to the provided string_view in a case-insensitive manner. + * Case comparisons may only be handled correctly for ASCII strings. + */ + inline bool key_equals_case_insensitive(std::string_view o) const noexcept; + /** + * Get the key of this key/value pair. + */ + inline const char *key_c_str() const noexcept; + /** + * Get the value of this key/value pair. + */ + inline element value() const noexcept; + + iterator() noexcept = default; + iterator(const iterator&) noexcept = default; + iterator& operator=(const iterator&) noexcept = default; + private: + simdjson_inline iterator(const internal::tape_ref &tape) noexcept; + + internal::tape_ref tape; + + friend class object; + }; + + /** + * Return the first key/value pair. + * + * Part of the std::iterable interface. + */ + inline iterator begin() const noexcept; + /** + * One past the last key/value pair. + * + * Part of the std::iterable interface. + */ + inline iterator end() const noexcept; + /** + * Get the size of the object (number of keys). + * It is a saturated value with a maximum of 0xFFFFFF: if the value + * is 0xFFFFFF then the size is 0xFFFFFF or greater. + */ + inline size_t size() const noexcept; + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](std::string_view key) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + * - INCORRECT_TYPE if this is not an object + */ + inline simdjson_result operator[](const char *key) const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * dom::parser parser; + * object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("/foo/a/1") == 20 + * obj.at_pointer("/foo")["a"].at(1) == 20 + * + * It is allowed for a key to be the empty string: + * + * dom::parser parser; + * object obj = parser.parse(R"({ "": { "a": [ 10, 20, 30 ] }})"_padded); + * obj.at_pointer("//a/1") == 20 + * obj.at_pointer("/")["a"].at(1) == 20 + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + + /** + * Get the value associated with the given key. + * + * The key will be matched against **unescaped** JSON: + * + * dom::parser parser; + * int64_t(parser.parse(R"({ "a\n": 1 })"_padded)["a\n"]) == 1 + * parser.parse(R"({ "a\n": 1 })"_padded)["a\\n"].get_uint64().error() == NO_SUCH_FIELD + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key(std::string_view key) const noexcept; + + /** + * Get the value associated with the given key in a case-insensitive manner. + * It is only guaranteed to work over ASCII inputs. + * + * Note: The key will be matched against **unescaped** JSON. + * + * This function has linear-time complexity: the keys are checked one by one. + * + * @return The value associated with this field, or: + * - NO_SUCH_FIELD if the field does not exist in the object + */ + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +private: + simdjson_inline object(const internal::tape_ref &tape) noexcept; + + internal::tape_ref tape; + + friend class element; + friend struct simdjson_result; + template + friend class simdjson::internal::string_builder; +}; + +/** + * Key/value pair in an object. + */ +class key_value_pair { +public: + /** key in the key-value pair **/ + std::string_view key; + /** value in the key-value pair **/ + element value; + +private: + simdjson_inline key_value_pair(std::string_view _key, element _value) noexcept; + friend class object; +}; + +} // namespace dom + +/** The result of a JSON conversion that may fail. */ +template<> +struct simdjson_result : public internal::simdjson_result_base { +public: + simdjson_inline simdjson_result() noexcept; ///< @private + simdjson_inline simdjson_result(dom::object value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + inline simdjson_result operator[](std::string_view key) const noexcept; + inline simdjson_result operator[](const char *key) const noexcept; + inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline simdjson_result at_key(std::string_view key) const noexcept; + inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; + +#if SIMDJSON_EXCEPTIONS + inline dom::object::iterator begin() const noexcept(false); + inline dom::object::iterator end() const noexcept(false); + inline size_t size() const noexcept(false); +#endif // SIMDJSON_EXCEPTIONS +}; + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +#include + +namespace std { +namespace ranges { +template<> +inline constexpr bool enable_view = true; +#if SIMDJSON_EXCEPTIONS +template<> +inline constexpr bool enable_view> = true; +#endif // SIMDJSON_EXCEPTIONS +} // namespace ranges +} // namespace std +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_DOM_OBJECT_H +/* end file simdjson/dom/object.h */ +/* skipped duplicate #include "simdjson/dom/parser.h" */ +/* including simdjson/dom/serialization.h: #include "simdjson/dom/serialization.h" */ +/* begin file simdjson/dom/serialization.h */ +#ifndef SIMDJSON_SERIALIZATION_H +#define SIMDJSON_SERIALIZATION_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/dom/object.h" */ + +#include + +namespace simdjson { + +/** + * The string_builder template and mini_formatter class + * are not part of our public API and are subject to change + * at any time! + */ +namespace internal { + +template +class base_formatter { +public: + /** Add a comma **/ + simdjson_inline void comma(); + /** Start an array, prints [ **/ + simdjson_inline void start_array(); + /** End an array, prints ] **/ + simdjson_inline void end_array(); + /** Start an array, prints { **/ + simdjson_inline void start_object(); + /** Start an array, prints } **/ + simdjson_inline void end_object(); + /** Prints a true **/ + simdjson_inline void true_atom(); + /** Prints a false **/ + simdjson_inline void false_atom(); + /** Prints a null **/ + simdjson_inline void null_atom(); + /** Prints a number **/ + simdjson_inline void number(int64_t x); + /** Prints a number **/ + simdjson_inline void number(uint64_t x); + /** Prints a number **/ + simdjson_inline void number(double x); + /** Prints a key (string + colon) **/ + simdjson_inline void key(std::string_view unescaped); + /** Prints a string. The string is escaped as needed. **/ + simdjson_inline void string(std::string_view unescaped); + /** Clears out the content. **/ + simdjson_inline void clear(); + /** + * Get access to the buffer, it is owned by the instance, but + * the user can make a copy. + **/ + simdjson_inline std::string_view str() const; + + /** Prints one character **/ + simdjson_inline void one_char(char c); + + simdjson_inline void call_print_newline() { + this->print_newline(); + } + + simdjson_inline void call_print_indents(size_t depth) { + this->print_indents(depth); + } + + simdjson_inline void call_print_space() { + this->print_space(); + } + +protected: + // implementation details (subject to change) + /** Backing buffer **/ + std::vector buffer{}; // not ideal! +}; + + +/** + * @private This is the class that we expect to use with the string_builder + * template. It tries to produce a compact version of the JSON element + * as quickly as possible. + */ +class mini_formatter : public base_formatter { +public: + simdjson_inline void print_newline(); + + simdjson_inline void print_indents(size_t depth); + + simdjson_inline void print_space(); +}; + +class pretty_formatter : public base_formatter { +public: + simdjson_inline void print_newline(); + + simdjson_inline void print_indents(size_t depth); + + simdjson_inline void print_space(); + +protected: + int indent_step = 4; +}; + +/** + * @private The string_builder template allows us to construct + * a string from a document element. It is parametrized + * by a "formatter" which handles the details. Thus + * the string_builder template could support both minification + * and prettification, and various other tradeoffs. + */ +template +class string_builder { +public: + /** Construct an initially empty builder, would print the empty string **/ + string_builder() = default; + /** Append an element to the builder (to be printed) **/ + inline void append(simdjson::dom::element value); + /** Append an array to the builder (to be printed) **/ + inline void append(simdjson::dom::array value); + /** Append an object to the builder (to be printed) **/ + inline void append(simdjson::dom::object value); + /** Reset the builder (so that it would print the empty string) **/ + simdjson_inline void clear(); + /** + * Get access to the string. The string_view is owned by the builder + * and it is invalid to use it after the string_builder has been + * destroyed. + * However you can make a copy of the string_view on memory that you + * own. + */ + simdjson_inline std::string_view str() const; + /** Append a key_value_pair to the builder (to be printed) **/ + simdjson_inline void append(simdjson::dom::key_value_pair value); +private: + formatter format{}; +}; + +} // internal + +namespace dom { + +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +} // namespace dom + +/** + * Converts JSON to a string. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * cout << to_string(doc) << endl; // prints [1,2,3] + * + */ +template +std::string to_string(T x) { + // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/ + // Currently minify and to_string are identical but in the future, they may + // differ. + simdjson::internal::string_builder<> sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); +} +#if SIMDJSON_EXCEPTIONS +template +std::string to_string(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + +/** + * Minifies a JSON element or document, printing the smallest possible valid JSON. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * cout << minify(doc) << endl; // prints [1,2,3] + * + */ +template +std::string minify(T x) { + return to_string(x); +} + +#if SIMDJSON_EXCEPTIONS +template +std::string minify(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + +/** + * Prettifies a JSON element or document, printing the valid JSON with indentation. + * + * dom::parser parser; + * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); + * + * // Prints: + * // { + * // [ + * // 1, + * // 2, + * // 3 + * // ] + * // } + * cout << prettify(doc) << endl; + * + */ +template +std::string prettify(T x) { + simdjson::internal::string_builder sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); +} + +#if SIMDJSON_EXCEPTIONS +template +std::string prettify(simdjson_result x) { + if (x.error()) { throw simdjson_error(x.error()); } + return to_string(x.value()); +} +#endif + +} // namespace simdjson + + +#endif +/* end file simdjson/dom/serialization.h */ + +// Deprecated API +/* including simdjson/dom/jsonparser.h: #include "simdjson/dom/jsonparser.h" */ +/* begin file simdjson/dom/jsonparser.h */ +// TODO Remove this -- deprecated API and files + +#ifndef SIMDJSON_DOM_JSONPARSER_H +#define SIMDJSON_DOM_JSONPARSER_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/parser.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ + +/* including simdjson/dom/parser-inl.h: #include "simdjson/dom/parser-inl.h" */ +/* begin file simdjson/dom/parser-inl.h */ +#ifndef SIMDJSON_PARSER_INL_H +#define SIMDJSON_PARSER_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/document_stream.h" */ +/* skipped duplicate #include "simdjson/implementation.h" */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ + +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* skipped duplicate #include "simdjson/padded_string-inl.h" */ +/* including simdjson/dom/document_stream-inl.h: #include "simdjson/dom/document_stream-inl.h" */ +/* begin file simdjson/dom/document_stream-inl.h */ +#ifndef SIMDJSON_DOCUMENT_STREAM_INL_H +#define SIMDJSON_DOCUMENT_STREAM_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/document_stream.h" */ +/* including simdjson/dom/element-inl.h: #include "simdjson/dom/element-inl.h" */ +/* begin file simdjson/dom/element-inl.h */ +#ifndef SIMDJSON_ELEMENT_INL_H +#define SIMDJSON_ELEMENT_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/dom/document.h" */ +/* skipped duplicate #include "simdjson/dom/object.h" */ +/* including simdjson/internal/tape_type.h: #include "simdjson/internal/tape_type.h" */ +/* begin file simdjson/internal/tape_type.h */ +#ifndef SIMDJSON_INTERNAL_TAPE_TYPE_H +#define SIMDJSON_INTERNAL_TAPE_TYPE_H + +namespace simdjson { +namespace internal { + +/** + * The possible types in the tape. + */ +enum class tape_type { + ROOT = 'r', + START_ARRAY = '[', + START_OBJECT = '{', + END_ARRAY = ']', + END_OBJECT = '}', + STRING = '"', + INT64 = 'l', + UINT64 = 'u', + DOUBLE = 'd', + TRUE_VALUE = 't', + FALSE_VALUE = 'f', + NULL_VALUE = 'n' +}; // enum class tape_type + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_TAPE_TYPE_H +/* end file simdjson/internal/tape_type.h */ + +/* including simdjson/dom/object-inl.h: #include "simdjson/dom/object-inl.h" */ +/* begin file simdjson/dom/object-inl.h */ +#ifndef SIMDJSON_OBJECT_INL_H +#define SIMDJSON_OBJECT_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/object.h" */ +/* skipped duplicate #include "simdjson/dom/document.h" */ + +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +/* skipped duplicate #include "simdjson/error-inl.h" */ + +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::object value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} + +inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); +} +inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); +} + +#if SIMDJSON_EXCEPTIONS + +inline dom::object::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::object::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +namespace dom { + +// +// object inline implementation +// +simdjson_inline object::object() noexcept : tape{} {} +simdjson_inline object::object(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline object::iterator object::begin() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline object::iterator object::end() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t object::size() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.scope_count(); +} + +inline simdjson_result object::operator[](std::string_view key) const noexcept { + return at_key(key); +} +inline simdjson_result object::operator[](const char *key) const noexcept { + return at_key(key); +} +inline simdjson_result object::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(json_pointer.empty()) { // an empty string means that we return the current node + return element(this->tape); // copy the current node + } else if(json_pointer[0] != '/') { // otherwise there is an error + return INVALID_JSON_POINTER; + } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = at_key(unescaped); + } else { + child = at_key(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +inline simdjson_result object::at_key(std::string_view key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} +// In case you wonder why we need this, please see +// https://github.com/simdjson/simdjson/issues/323 +// People do seek keys in a case-insensitive manner. +inline simdjson_result object::at_key_case_insensitive(std::string_view key) const noexcept { + iterator end_field = end(); + for (iterator field = begin(); field != end_field; ++field) { + if (field.key_equals_case_insensitive(key)) { + return field.value(); + } + } + return NO_SUCH_FIELD; +} + +// +// object::iterator inline implementation +// +simdjson_inline object::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline const key_value_pair object::iterator::operator*() const noexcept { + return key_value_pair(key(), value()); +} +inline bool object::iterator::operator!=(const object::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline bool object::iterator::operator==(const object::iterator& other) const noexcept { + return tape.json_index == other.tape.json_index; +} +inline bool object::iterator::operator<(const object::iterator& other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool object::iterator::operator<=(const object::iterator& other) const noexcept { + return tape.json_index <= other.tape.json_index; +} +inline bool object::iterator::operator>=(const object::iterator& other) const noexcept { + return tape.json_index >= other.tape.json_index; +} +inline bool object::iterator::operator>(const object::iterator& other) const noexcept { + return tape.json_index > other.tape.json_index; +} +inline object::iterator& object::iterator::operator++() noexcept { + tape.json_index++; + tape.json_index = tape.after_element(); + return *this; +} +inline object::iterator object::iterator::operator++(int) noexcept { + object::iterator out = *this; + ++*this; + return out; +} +inline std::string_view object::iterator::key() const noexcept { + return tape.get_string_view(); +} +inline uint32_t object::iterator::key_length() const noexcept { + return tape.get_string_length(); +} +inline const char* object::iterator::key_c_str() const noexcept { + return reinterpret_cast(&tape.doc->string_buf[size_t(tape.tape_value()) + sizeof(uint32_t)]); +} +inline element object::iterator::value() const noexcept { + return element(internal::tape_ref(tape.doc, tape.json_index + 1)); +} + +/** + * Design notes: + * Instead of constructing a string_view and then comparing it with a + * user-provided strings, it is probably more performant to have dedicated + * functions taking as a parameter the string we want to compare against + * and return true when they are equal. That avoids the creation of a temporary + * std::string_view. Though it is possible for the compiler to avoid entirely + * any overhead due to string_view, relying too much on compiler magic is + * problematic: compiler magic sometimes fail, and then what do you do? + * Also, enticing users to rely on high-performance function is probably better + * on the long run. + */ + +inline bool object::iterator::key_equals(std::string_view o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // We avoid construction of a temporary string_view instance. + return (memcmp(o.data(), key_c_str(), len) == 0); + } + return false; +} + +inline bool object::iterator::key_equals_case_insensitive(std::string_view o) const noexcept { + // We use the fact that the key length can be computed quickly + // without access to the string buffer. + const uint32_t len = key_length(); + if(o.size() == len) { + // See For case-insensitive string comparisons, avoid char-by-char functions + // https://lemire.me/blog/2020/04/30/for-case-insensitive-string-comparisons-avoid-char-by-char-functions/ + // Note that it might be worth rolling our own strncasecmp function, with vectorization. + return (simdjson_strncasecmp(o.data(), key_c_str(), len) == 0); + } + return false; +} +// +// key_value_pair inline implementation +// +inline key_value_pair::key_value_pair(std::string_view _key, element _value) noexcept : + key(_key), value(_value) {} + +} // namespace dom + +} // namespace simdjson + +#if defined(__cpp_lib_ranges) +static_assert(std::ranges::view); +static_assert(std::ranges::sized_range); +#if SIMDJSON_EXCEPTIONS +static_assert(std::ranges::view>); +static_assert(std::ranges::sized_range>); +#endif // SIMDJSON_EXCEPTIONS +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_OBJECT_INL_H +/* end file simdjson/dom/object-inl.h */ +/* skipped duplicate #include "simdjson/error-inl.h" */ + +#include +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::element &&value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} +inline simdjson_result simdjson_result::type() const noexcept { + if (error()) { return error(); } + return first.type(); +} + +template +simdjson_inline bool simdjson_result::is() const noexcept { + return !error() && first.is(); +} +template +simdjson_inline simdjson_result simdjson_result::get() const noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) const noexcept { + if (error()) { return error(); } + return first.get(value); +} + +simdjson_inline simdjson_result simdjson_result::get_array() const noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() const noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_c_str() const noexcept { + if (error()) { return error(); } + return first.get_c_str(); +} +simdjson_inline simdjson_result simdjson_result::get_string_length() const noexcept { + if (error()) { return error(); } + return first.get_string_length(); +} +simdjson_inline simdjson_result simdjson_result::get_string() const noexcept { + if (error()) { return error(); } + return first.get_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() const noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() const noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_double() const noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() const noexcept { + if (error()) { return error(); } + return first.get_bool(); +} + +simdjson_inline bool simdjson_result::is_array() const noexcept { + return !error() && first.is_array(); +} +simdjson_inline bool simdjson_result::is_object() const noexcept { + return !error() && first.is_object(); +} +simdjson_inline bool simdjson_result::is_string() const noexcept { + return !error() && first.is_string(); +} +simdjson_inline bool simdjson_result::is_int64() const noexcept { + return !error() && first.is_int64(); +} +simdjson_inline bool simdjson_result::is_uint64() const noexcept { + return !error() && first.is_uint64(); +} +simdjson_inline bool simdjson_result::is_double() const noexcept { + return !error() && first.is_double(); +} +simdjson_inline bool simdjson_result::is_number() const noexcept { + return !error() && first.is_number(); +} +simdjson_inline bool simdjson_result::is_bool() const noexcept { + return !error() && first.is_bool(); +} + +simdjson_inline bool simdjson_result::is_null() const noexcept { + return !error() && first.is_null(); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) const noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::at_pointer(const std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] +simdjson_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_DEPRECATED_WARNING + if (error()) { return error(); } + return first.at(json_pointer); +SIMDJSON_POP_DISABLE_WARNINGS +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +simdjson_inline simdjson_result simdjson_result::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key(key); +} +simdjson_inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { + if (error()) { return error(); } + return first.at_key_case_insensitive(key); +} + +#if SIMDJSON_EXCEPTIONS + +simdjson_inline simdjson_result::operator bool() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator const char *() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator std::string_view() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator uint64_t() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator int64_t() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator double() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator dom::array() const noexcept(false) { + return get(); +} +simdjson_inline simdjson_result::operator dom::object() const noexcept(false) { + return get(); +} + +simdjson_inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +simdjson_inline dom::array::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} + +#endif // SIMDJSON_EXCEPTIONS + +namespace dom { + +// +// element inline implementation +// +simdjson_inline element::element() noexcept : tape{} {} +simdjson_inline element::element(const internal::tape_ref &_tape) noexcept : tape{_tape} { } + +inline element_type element::type() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + auto tape_type = tape.tape_ref_type(); + return tape_type == internal::tape_type::FALSE_VALUE ? element_type::BOOL : static_cast(tape_type); +} + +inline simdjson_result element::get_bool() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(tape.is_true()) { + return true; + } else if(tape.is_false()) { + return false; + } + return INCORRECT_TYPE; +} +inline simdjson_result element::get_c_str() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_c_str(); + } + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_string_length() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: { + return tape.get_string_length(); + } + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_string() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::STRING: + return tape.get_string_view(); + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_uint64() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(simdjson_unlikely(!tape.is_uint64())) { // branch rarely taken + if(tape.is_int64()) { + int64_t result = tape.next_tape_value(); + if (result < 0) { + return NUMBER_OUT_OF_RANGE; + } + return uint64_t(result); + } + return INCORRECT_TYPE; + } + return tape.next_tape_value(); +} +inline simdjson_result element::get_int64() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(simdjson_unlikely(!tape.is_int64())) { // branch rarely taken + if(tape.is_uint64()) { + uint64_t result = tape.next_tape_value(); + // Wrapping max in parens to handle Windows issue: https://stackoverflow.com/questions/11544073/how-do-i-deal-with-the-max-macro-in-windows-h-colliding-with-max-in-std + if (result > uint64_t((std::numeric_limits::max)())) { + return NUMBER_OUT_OF_RANGE; + } + return static_cast(result); + } + return INCORRECT_TYPE; + } + return tape.next_tape_value(); +} +inline simdjson_result element::get_double() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + // Performance considerations: + // 1. Querying tape_ref_type() implies doing a shift, it is fast to just do a straight + // comparison. + // 2. Using a switch-case relies on the compiler guessing what kind of code generation + // we want... But the compiler cannot know that we expect the type to be "double" + // most of the time. + // We can expect get to refer to a double type almost all the time. + // It is important to craft the code accordingly so that the compiler can use this + // information. (This could also be solved with profile-guided optimization.) + if(simdjson_unlikely(!tape.is_double())) { // branch rarely taken + if(tape.is_uint64()) { + return double(tape.next_tape_value()); + } else if(tape.is_int64()) { + return double(tape.next_tape_value()); + } + return INCORRECT_TYPE; + } + // this is common: + return tape.next_tape_value(); +} +inline simdjson_result element::get_array() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::START_ARRAY: + return array(tape); + default: + return INCORRECT_TYPE; + } +} +inline simdjson_result element::get_object() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape); + default: + return INCORRECT_TYPE; + } +} + +template +simdjson_warn_unused simdjson_inline error_code element::get(T &value) const noexcept { + return get().get(value); +} +// An element-specific version prevents recursion with simdjson_result::get(value) +template<> +simdjson_warn_unused simdjson_inline error_code element::get(element &value) const noexcept { + value = element(tape); + return SUCCESS; +} +template +inline void element::tie(T &value, error_code &error) && noexcept { + error = get(value); +} + +template +simdjson_inline bool element::is() const noexcept { + auto result = get(); + return !result.error(); +} + +template<> inline simdjson_result element::get() const noexcept { return get_array(); } +template<> inline simdjson_result element::get() const noexcept { return get_object(); } +template<> inline simdjson_result element::get() const noexcept { return get_c_str(); } +template<> inline simdjson_result element::get() const noexcept { return get_string(); } +template<> inline simdjson_result element::get() const noexcept { return get_int64(); } +template<> inline simdjson_result element::get() const noexcept { return get_uint64(); } +template<> inline simdjson_result element::get() const noexcept { return get_double(); } +template<> inline simdjson_result element::get() const noexcept { return get_bool(); } + +inline bool element::is_array() const noexcept { return is(); } +inline bool element::is_object() const noexcept { return is(); } +inline bool element::is_string() const noexcept { return is(); } +inline bool element::is_int64() const noexcept { return is(); } +inline bool element::is_uint64() const noexcept { return is(); } +inline bool element::is_double() const noexcept { return is(); } +inline bool element::is_bool() const noexcept { return is(); } +inline bool element::is_number() const noexcept { return is_int64() || is_uint64() || is_double(); } + +inline bool element::is_null() const noexcept { + return tape.is_null_on_tape(); +} + +#if SIMDJSON_EXCEPTIONS + +inline element::operator bool() const noexcept(false) { return get(); } +inline element::operator const char*() const noexcept(false) { return get(); } +inline element::operator std::string_view() const noexcept(false) { return get(); } +inline element::operator uint64_t() const noexcept(false) { return get(); } +inline element::operator int64_t() const noexcept(false) { return get(); } +inline element::operator double() const noexcept(false) { return get(); } +inline element::operator array() const noexcept(false) { return get(); } +inline element::operator object() const noexcept(false) { return get(); } + +inline array::iterator element::begin() const noexcept(false) { + return get().begin(); +} +inline array::iterator element::end() const noexcept(false) { + return get().end(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result element::operator[](std::string_view key) const noexcept { + return at_key(key); +} +inline simdjson_result element::operator[](const char *key) const noexcept { + return at_key(key); +} + +inline simdjson_result element::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape).at_pointer(json_pointer); + case internal::tape_type::START_ARRAY: + return array(tape).at_pointer(json_pointer); + default: { + if(!json_pointer.empty()) { // a non-empty string is invalid on an atom + return INVALID_JSON_POINTER; + } + // an empty string means that we return the current node + dom::element copy(*this); + return simdjson_result(std::move(copy)); + } + } +} +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] +inline simdjson_result element::at(std::string_view json_pointer) const noexcept { + // version 0.4 of simdjson allowed non-compliant pointers + auto std_pointer = (json_pointer.empty() ? "" : "/") + std::string(json_pointer.begin(), json_pointer.end()); + return at_pointer(std_pointer); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline simdjson_result element::at(size_t index) const noexcept { + return get().at(index); +} +inline simdjson_result element::at_key(std::string_view key) const noexcept { + return get().at_key(key); +} +inline simdjson_result element::at_key_case_insensitive(std::string_view key) const noexcept { + return get().at_key_case_insensitive(key); +} + +inline bool element::dump_raw_tape(std::ostream &out) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.doc->dump_raw_tape(out); +} + + +inline std::ostream& operator<<(std::ostream& out, element_type type) { + switch (type) { + case element_type::ARRAY: + return out << "array"; + case element_type::OBJECT: + return out << "object"; + case element_type::INT64: + return out << "int64_t"; + case element_type::UINT64: + return out << "uint64_t"; + case element_type::DOUBLE: + return out << "double"; + case element_type::STRING: + return out << "string"; + case element_type::BOOL: + return out << "bool"; + case element_type::NULL_VALUE: + return out << "null"; + default: + return out << "unexpected content!!!"; // abort() usage is forbidden in the library + } +} + +} // namespace dom + +} // namespace simdjson + +#endif // SIMDJSON_ELEMENT_INL_H +/* end file simdjson/dom/element-inl.h */ +/* skipped duplicate #include "simdjson/dom/parser-inl.h" */ +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ + +namespace simdjson { +namespace dom { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, dom::parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} +#endif + +simdjson_inline document_stream::document_stream( + dom::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + error{SUCCESS} +#ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change +#endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + error{UNINITIALIZED} +#ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) +#endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept { +#ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); +#endif +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline document_stream::iterator::reference document_stream::iterator::operator*() noexcept { + // Note that in case of error, we do not yet mark + // the iterator as "finished": this detection is done + // in the operator++ function since it is possible + // to call operator++ repeatedly while omitting + // calls to operator*. + if (stream->error) { return stream->error; } + return stream->parser->doc.root(); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->ensure_capacity(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } +#ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread if needed + error = stage1_thread_parser.ensure_capacity(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } +#endif // SIMDJSON_THREADS_ENABLED + next(); +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + const char* start = reinterpret_cast(stream->buf) + current_index(); + bool object_or_array = ((*start == '[') || (*start == '{')); + if(object_or_array) { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index - 1]; + return std::string_view(start, next_doc_index - current_index() + 1); + } else { + size_t next_doc_index = stream->batch_start + stream->parser->implementation->structural_indexes[stream->parser->implementation->next_structural_index]; + return std::string_view(reinterpret_cast(stream->buf) + current_index(), next_doc_index - current_index() - 1); + } +} + + +inline void document_stream::next() noexcept { + // We always exit at once, once in an error condition. + if (error) { return; } + + // Load the next document from the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + // If that was the last document in the batch, load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + +#ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } +#else + error = run_stage1(*parser, batch_start); +#endif + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + // Run stage 2 on the first document in the batch + doc_index = batch_start + parser->implementation->structural_indexes[parser->implementation->next_structural_index]; + error = parser->implementation->stage2_next(parser->doc); + } +} +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(dom::parser &p, size_t _batch_start) noexcept { + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(*parser, stage1_thread_parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace dom + +simdjson_inline simdjson_result::simdjson_result() noexcept + : simdjson_result_base() { +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : simdjson_result_base(error) { +} +simdjson_inline simdjson_result::simdjson_result(dom::document_stream &&value) noexcept + : simdjson_result_base(std::forward(value)) { +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +#else // SIMDJSON_EXCEPTIONS +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +simdjson_inline dom::document_stream::iterator simdjson_result::begin() noexcept { + first.error = error(); + return first.begin(); +} +simdjson_inline dom::document_stream::iterator simdjson_result::end() noexcept { + first.error = error(); + return first.end(); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API +#endif // SIMDJSON_EXCEPTIONS + +} // namespace simdjson +#endif // SIMDJSON_DOCUMENT_STREAM_INL_H +/* end file simdjson/dom/document_stream-inl.h */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ + +#include + +namespace simdjson { +namespace dom { + +// +// parser inline implementation +// +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity}, + loaded_bytes(nullptr) { +} +simdjson_inline parser::parser(parser &&other) noexcept = default; +simdjson_inline parser &parser::operator=(parser &&other) noexcept = default; + +inline bool parser::is_valid() const noexcept { return valid; } +inline int parser::get_error_code() const noexcept { return error; } +inline std::string parser::get_error_message() const noexcept { return error_message(error); } + +inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { + return valid ? doc.dump_raw_tape(os) : false; +} + +inline simdjson_result parser::read_file(const std::string &path) noexcept { + // Open the file + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + std::FILE *fp = std::fopen(path.c_str(), "rb"); + SIMDJSON_POP_DISABLE_WARNINGS + + if (fp == nullptr) { + return IO_ERROR; + } + + // Get the file size + int ret; +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + ret = _fseeki64(fp, 0, SEEK_END); +#else + ret = std::fseek(fp, 0, SEEK_END); +#endif // _WIN64 + if(ret < 0) { + std::fclose(fp); + return IO_ERROR; + } +#if SIMDJSON_VISUAL_STUDIO && !SIMDJSON_IS_32BITS + __int64 len = _ftelli64(fp); + if(len == -1L) { + std::fclose(fp); + return IO_ERROR; + } +#else + long len = std::ftell(fp); + if((len < 0) || (len == LONG_MAX)) { + std::fclose(fp); + return IO_ERROR; + } +#endif + + // Make sure we have enough capacity to load the file + if (_loaded_bytes_capacity < size_t(len)) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + std::fclose(fp); + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + + // Read the string + std::rewind(fp); + size_t bytes_read = std::fread(loaded_bytes.get(), 1, len, fp); + if (std::fclose(fp) != 0 || bytes_read != size_t(len)) { + return IO_ERROR; + } + + return bytes_read; +} + +inline simdjson_result parser::load(const std::string &path) & noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + return parse(loaded_bytes.get(), len, false); +} + +inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { + size_t len; + auto _error = read_file(path).get(len); + if (_error) { return _error; } + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, reinterpret_cast(loaded_bytes.get()), len, batch_size); +} + +inline simdjson_result parser::parse_into_document(document& provided_doc, const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + // Important: we need to ensure that document has enough capacity. + // Important: It is possible that provided_doc is actually the internal 'doc' within the parser!!! + error_code _error = ensure_capacity(provided_doc, len); + if (_error) { return _error; } + if (realloc_if_needed) { + // Make sure we have enough capacity to copy len bytes + if (!loaded_bytes || _loaded_bytes_capacity < len) { + loaded_bytes.reset( internal::allocate_padded_buffer(len) ); + if (!loaded_bytes) { + return MEMALLOC; + } + _loaded_bytes_capacity = len; + } + std::memcpy(static_cast(loaded_bytes.get()), buf, len); + } + _error = implementation->parse(realloc_if_needed ? reinterpret_cast(loaded_bytes.get()): buf, len, provided_doc); + + if (_error) { return _error; } + + return provided_doc.root(); +} + +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(provided_doc, reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const std::string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_inline simdjson_result parser::parse_into_document(document& provided_doc, const padded_string &s) & noexcept { + return parse_into_document(provided_doc, s.data(), s.length(), false); +} + + +inline simdjson_result parser::parse(const uint8_t *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse_into_document(doc, buf, len, realloc_if_needed); +} + +simdjson_inline simdjson_result parser::parse(const char *buf, size_t len, bool realloc_if_needed) & noexcept { + return parse(reinterpret_cast(buf), len, realloc_if_needed); +} +simdjson_inline simdjson_result parser::parse(const std::string &s) & noexcept { + return parse(s.data(), s.length(), s.capacity() - s.length() < SIMDJSON_PADDING); +} +simdjson_inline simdjson_result parser::parse(const padded_string &s) & noexcept { + return parse(s.data(), s.length(), false); +} +simdjson_inline simdjson_result parser::parse(const padded_string_view &v) & noexcept { + return parse(v.data(), v.length(), false); +} + +inline simdjson_result parser::parse_many(const uint8_t *buf, size_t len, size_t batch_size) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + return document_stream(*this, buf, len, batch_size); +} +inline simdjson_result parser::parse_many(const char *buf, size_t len, size_t batch_size) noexcept { + return parse_many(reinterpret_cast(buf), len, batch_size); +} +inline simdjson_result parser::parse_many(const std::string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} +inline simdjson_result parser::parse_many(const padded_string &s, size_t batch_size) noexcept { + return parse_many(s.data(), s.length(), batch_size); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return implementation ? implementation->capacity() : 0; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return implementation ? implementation->max_depth() : DEFAULT_MAX_DEPTH; +} + +simdjson_warn_unused +inline error_code parser::allocate(size_t capacity, size_t max_depth) noexcept { + // + // Reallocate implementation if needed + // + error_code err; + if (implementation) { + err = implementation->allocate(capacity, max_depth); + } else { + err = simdjson::get_active_implementation()->create_dom_parser_implementation(capacity, max_depth, implementation); + } + if (err) { return err; } + return SUCCESS; +} + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +simdjson_warn_unused +inline bool parser::allocate_capacity(size_t capacity, size_t max_depth) noexcept { + return !allocate(capacity, max_depth); +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +inline error_code parser::ensure_capacity(size_t desired_capacity) noexcept { + return ensure_capacity(doc, desired_capacity); +} + + +inline error_code parser::ensure_capacity(document& target_document, size_t desired_capacity) noexcept { + // 1. It is wasteful to allocate a document and a parser for documents spanning less than MINIMAL_DOCUMENT_CAPACITY bytes. + // 2. If we allow desired_capacity = 0 then it is possible to exit this function with implementation == nullptr. + if(desired_capacity < MINIMAL_DOCUMENT_CAPACITY) { desired_capacity = MINIMAL_DOCUMENT_CAPACITY; } + // If we don't have enough capacity, (try to) automatically bump it. + // If the document needs allocation, do it too. + // Both in one if statement to minimize unlikely branching. + // + // Note: we must make sure that this function is called if capacity() == 0. We do so because we + // ensure that desired_capacity > 0. + if (simdjson_unlikely(capacity() < desired_capacity || target_document.capacity() < desired_capacity)) { + if (desired_capacity > max_capacity()) { + return error = CAPACITY; + } + error_code err1 = target_document.capacity() < desired_capacity ? target_document.allocate(desired_capacity) : SUCCESS; + error_code err2 = capacity() < desired_capacity ? allocate(desired_capacity, max_depth()) : SUCCESS; + if(err1 != SUCCESS) { return error = err1; } + if(err2 != SUCCESS) { return error = err2; } + } + return SUCCESS; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity > MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = MINIMAL_DOCUMENT_CAPACITY; + } +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_PARSER_INL_H +/* end file simdjson/dom/parser-inl.h */ + +namespace simdjson { + +// +// C API (json_parse and build_parsed_json) declarations +// + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const uint8_t *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const char *buf, size_t len, dom::parser &parser, bool realloc_if_needed = true) noexcept { + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const std::string &s, dom::parser &parser, bool realloc_if_needed = true) noexcept { + error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} +[[deprecated("Use parser.parse() instead")]] +inline int json_parse(const padded_string &s, dom::parser &parser) noexcept { + error_code code = parser.parse(s).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return code; +} + +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const uint8_t *buf, size_t len, bool realloc_if_needed = true) noexcept { + dom::parser parser; + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const char *buf, size_t len, bool realloc_if_needed = true) noexcept { + dom::parser parser; + error_code code = parser.parse(buf, len, realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const std::string &s, bool realloc_if_needed = true) noexcept { + dom::parser parser; + error_code code = parser.parse(s.data(), s.length(), realloc_if_needed).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +[[deprecated("Use parser.parse() instead")]] +simdjson_warn_unused inline dom::parser build_parsed_json(const padded_string &s) noexcept { + dom::parser parser; + error_code code = parser.parse(s).error(); + // The deprecated json_parse API is a signal that the user plans to *use* the error code / valid + // bits in the parser instead of heeding the result code. The normal parser unsets those in + // anticipation of making the error code ephemeral. + // Here we put the code back into the parser, until we've removed this method. + parser.valid = code == SUCCESS; + parser.error = code; + return parser; +} +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +/** @private We do not want to allow implicit conversion from C string to std::string. */ +int json_parse(const char *buf, dom::parser &parser) noexcept = delete; +/** @private We do not want to allow implicit conversion from C string to std::string. */ +dom::parser build_parsed_json(const char *buf) noexcept = delete; + +} // namespace simdjson + +#endif // SIMDJSON_DOM_JSONPARSER_H +/* end file simdjson/dom/jsonparser.h */ +/* including simdjson/dom/parsedjson.h: #include "simdjson/dom/parsedjson.h" */ +/* begin file simdjson/dom/parsedjson.h */ +// TODO Remove this -- deprecated API and files + +#ifndef SIMDJSON_DOM_PARSEDJSON_H +#define SIMDJSON_DOM_PARSEDJSON_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ + +namespace simdjson { + +/** + * @deprecated Use `dom::parser` instead. + */ +using ParsedJson [[deprecated("Use dom::parser instead")]] = dom::parser; + +} // namespace simdjson + +#endif // SIMDJSON_DOM_PARSEDJSON_H +/* end file simdjson/dom/parsedjson.h */ +/* including simdjson/dom/parsedjson_iterator.h: #include "simdjson/dom/parsedjson_iterator.h" */ +/* begin file simdjson/dom/parsedjson_iterator.h */ +// TODO Remove this -- deprecated API and files + +#ifndef SIMDJSON_DOM_PARSEDJSON_ITERATOR_H +#define SIMDJSON_DOM_PARSEDJSON_ITERATOR_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/parser.h" */ + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + +namespace simdjson { +/** @private **/ +class [[deprecated("Use the new DOM navigation API instead (see doc/basics.md)")]] dom::parser::Iterator { +public: + inline Iterator(const dom::parser &parser) noexcept(false); + inline Iterator(const Iterator &o) noexcept; + inline ~Iterator() noexcept; + + inline Iterator& operator=(const Iterator&) = delete; + + inline bool is_ok() const; + + // useful for debugging purposes + inline size_t get_tape_location() const; + + // useful for debugging purposes + inline size_t get_tape_length() const; + + // returns the current depth (start at 1 with 0 reserved for the fictitious + // root node) + inline size_t get_depth() const; + + // A scope is a series of nodes at the same depth, typically it is either an + // object ({) or an array ([). The root node has type 'r'. + inline uint8_t get_scope_type() const; + + // move forward in document order + inline bool move_forward(); + + // retrieve the character code of what we're looking at: + // [{"slutfn are the possibilities + inline uint8_t get_type() const { + return current_type; // short functions should be inlined! + } + + // get the int64_t value at this node; valid only if get_type is "l" + inline int64_t get_integer() const; + + // get the value as uint64; valid only if if get_type is "u" + inline uint64_t get_unsigned_integer() const; + + // get the string value at this node (NULL ended); valid only if get_type is " + // note that tabs, and line endings are escaped in the returned value (see + // print_with_escapes) return value is valid UTF-8, it may contain NULL chars + // within the string: get_string_length determines the true string length. + inline const char *get_string() const; + + // return the length of the string in bytes + inline uint32_t get_string_length() const; + + // get the double value at this node; valid only if + // get_type() is "d" + inline double get_double() const; + + inline bool is_object_or_array() const { return is_object() || is_array(); } + + inline bool is_object() const { return get_type() == '{'; } + + inline bool is_array() const { return get_type() == '['; } + + inline bool is_string() const { return get_type() == '"'; } + + // Returns true if the current type of the node is an signed integer. + // You can get its value with `get_integer()`. + inline bool is_integer() const { return get_type() == 'l'; } + + // Returns true if the current type of the node is an unsigned integer. + // You can get its value with `get_unsigned_integer()`. + // + // NOTE: + // Only a large value, which is out of range of a 64-bit signed integer, is + // represented internally as an unsigned node. On the other hand, a typical + // positive integer, such as 1, 42, or 1000000, is as a signed node. + // Be aware this function returns false for a signed node. + inline bool is_unsigned_integer() const { return get_type() == 'u'; } + // Returns true if the current type of the node is a double floating-point number. + inline bool is_double() const { return get_type() == 'd'; } + // Returns true if the current type of the node is a number (integer or floating-point). + inline bool is_number() const { + return is_integer() || is_unsigned_integer() || is_double(); + } + // Returns true if the current type of the node is a bool with true value. + inline bool is_true() const { return get_type() == 't'; } + // Returns true if the current type of the node is a bool with false value. + inline bool is_false() const { return get_type() == 'f'; } + // Returns true if the current type of the node is null. + inline bool is_null() const { return get_type() == 'n'; } + // Returns true if the type byte represents an object of an array + static bool is_object_or_array(uint8_t type) { + return ((type == '[') || (type == '{')); + } + + // when at {, go one level deep, looking for a given key + // if successful, we are left pointing at the value, + // if not, we are still pointing at the object ({) + // (in case of repeated keys, this only finds the first one). + // We seek the key using C's strcmp so if your JSON strings contain + // NULL chars, this would trigger a false positive: if you expect that + // to be the case, take extra precautions. + // Furthermore, we do the comparison character-by-character + // without taking into account Unicode equivalence. + inline bool move_to_key(const char *key); + + // as above, but case insensitive lookup (strcmpi instead of strcmp) + inline bool move_to_key_insensitive(const char *key); + + // when at {, go one level deep, looking for a given key + // if successful, we are left pointing at the value, + // if not, we are still pointing at the object ({) + // (in case of repeated keys, this only finds the first one). + // The string we search for can contain NULL values. + // Furthermore, we do the comparison character-by-character + // without taking into account Unicode equivalence. + inline bool move_to_key(const char *key, uint32_t length); + + // when at a key location within an object, this moves to the accompanying + // value (located next to it). This is equivalent but much faster than + // calling "next()". + inline void move_to_value(); + + // when at [, go one level deep, and advance to the given index. + // if successful, we are left pointing at the value, + // if not, we are still pointing at the array ([) + inline bool move_to_index(uint32_t index); + + // Moves the iterator to the value corresponding to the json pointer. + // Always search from the root of the document. + // if successful, we are left pointing at the value, + // if not, we are still pointing the same value we were pointing before the + // call. The json pointer follows the rfc6901 standard's syntax: + // https://tools.ietf.org/html/rfc6901 However, the standard says "If a + // referenced member name is not unique in an object, the member that is + // referenced is undefined, and evaluation fails". Here we just return the + // first corresponding value. The length parameter is the length of the + // jsonpointer string ('pointer'). + inline bool move_to(const char *pointer, uint32_t length); + + // Moves the iterator to the value corresponding to the json pointer. + // Always search from the root of the document. + // if successful, we are left pointing at the value, + // if not, we are still pointing the same value we were pointing before the + // call. The json pointer implementation follows the rfc6901 standard's + // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says + // "If a referenced member name is not unique in an object, the member that + // is referenced is undefined, and evaluation fails". Here we just return + // the first corresponding value. + inline bool move_to(const std::string &pointer); + + private: + // Almost the same as move_to(), except it searches from the current + // position. The pointer's syntax is identical, though that case is not + // handled by the rfc6901 standard. The '/' is still required at the + // beginning. However, contrary to move_to(), the URI Fragment Identifier + // Representation is not supported here. Also, in case of failure, we are + // left pointing at the closest value it could reach. For these reasons it + // is private. It exists because it is used by move_to(). + inline bool relative_move_to(const char *pointer, uint32_t length); + + public: + // throughout return true if we can do the navigation, false + // otherwise + + // Within a given scope (series of nodes at the same depth within either an + // array or an object), we move forward. + // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, { + // and [. At the object ({) or at the array ([), you can issue a "down" to + // visit their content. valid if we're not at the end of a scope (returns + // true). + inline bool next(); + + // Within a given scope (series of nodes at the same depth within either an + // array or an object), we move backward. + // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true + // when starting at the end of the scope. At the object ({) or at the array + // ([), you can issue a "down" to visit their content. + // Performance warning: This function is implemented by starting again + // from the beginning of the scope and scanning forward. You should expect + // it to be relatively slow. + inline bool prev(); + + // Moves back to either the containing array or object (type { or [) from + // within a contained scope. + // Valid unless we are at the first level of the document + inline bool up(); + + // Valid if we're at a [ or { and it starts a non-empty scope; moves us to + // start of that deeper scope if it not empty. Thus, given [true, null, + // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node. + inline bool down(); + + // move us to the start of our current scope, + // a scope is a series of nodes at the same level + inline void to_start_scope(); + + inline void rewind(); + + + + // print the node we are currently pointing at + inline bool print(std::ostream &os, bool escape_strings = true) const; + + private: + const document &doc; + size_t max_depth{}; + size_t depth{}; + size_t location{}; // our current location on a tape + size_t tape_length{}; + uint8_t current_type{}; + uint64_t current_val{}; + typedef struct { + size_t start_of_scope; + uint8_t scope_type; + } scopeindex_t; + + scopeindex_t *depth_index{}; +}; + +} // namespace simdjson +#endif // SIMDJSON_DISABLE_DEPRECATED_API + +#endif // SIMDJSON_DOM_PARSEDJSON_ITERATOR_H +/* end file simdjson/dom/parsedjson_iterator.h */ + +// Inline functions +/* including simdjson/dom/array-inl.h: #include "simdjson/dom/array-inl.h" */ +/* begin file simdjson/dom/array-inl.h */ +#ifndef SIMDJSON_ARRAY_INL_H +#define SIMDJSON_ARRAY_INL_H + +#include + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/array.h" */ +/* skipped duplicate #include "simdjson/dom/element.h" */ +/* skipped duplicate #include "simdjson/error-inl.h" */ +/* including simdjson/internal/tape_ref-inl.h: #include "simdjson/internal/tape_ref-inl.h" */ +/* begin file simdjson/internal/tape_ref-inl.h */ +#ifndef SIMDJSON_TAPE_REF_INL_H +#define SIMDJSON_TAPE_REF_INL_H + +/* skipped duplicate #include "simdjson/dom/document.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref.h" */ +/* skipped duplicate #include "simdjson/internal/tape_type.h" */ + +#include + +namespace simdjson { +namespace internal { + +constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF; +constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF; + +// +// tape_ref inline implementation +// +simdjson_inline tape_ref::tape_ref() noexcept : doc{nullptr}, json_index{0} {} +simdjson_inline tape_ref::tape_ref(const dom::document *_doc, size_t _json_index) noexcept : doc{_doc}, json_index{_json_index} {} + + +simdjson_inline bool tape_ref::is_document_root() const noexcept { + return json_index == 1; // should we ever change the structure of the tape, this should get updated. +} +simdjson_inline bool tape_ref::usable() const noexcept { + return doc != nullptr; // when the document pointer is null, this tape_ref is uninitialized (should not be accessed). +} +// Some value types have a specific on-tape word value. It can be faster +// to check the type by doing a word-to-word comparison instead of extracting the +// most significant 8 bits. + +simdjson_inline bool tape_ref::is_double() const noexcept { + constexpr uint64_t tape_double = uint64_t(tape_type::DOUBLE)<<56; + return doc->tape[json_index] == tape_double; +} +simdjson_inline bool tape_ref::is_int64() const noexcept { + constexpr uint64_t tape_int64 = uint64_t(tape_type::INT64)<<56; + return doc->tape[json_index] == tape_int64; +} +simdjson_inline bool tape_ref::is_uint64() const noexcept { + constexpr uint64_t tape_uint64 = uint64_t(tape_type::UINT64)<<56; + return doc->tape[json_index] == tape_uint64; +} +simdjson_inline bool tape_ref::is_false() const noexcept { + constexpr uint64_t tape_false = uint64_t(tape_type::FALSE_VALUE)<<56; + return doc->tape[json_index] == tape_false; +} +simdjson_inline bool tape_ref::is_true() const noexcept { + constexpr uint64_t tape_true = uint64_t(tape_type::TRUE_VALUE)<<56; + return doc->tape[json_index] == tape_true; +} +simdjson_inline bool tape_ref::is_null_on_tape() const noexcept { + constexpr uint64_t tape_null = uint64_t(tape_type::NULL_VALUE)<<56; + return doc->tape[json_index] == tape_null; +} + +inline size_t tape_ref::after_element() const noexcept { + switch (tape_ref_type()) { + case tape_type::START_ARRAY: + case tape_type::START_OBJECT: + return matching_brace_index(); + case tape_type::UINT64: + case tape_type::INT64: + case tape_type::DOUBLE: + return json_index + 2; + default: + return json_index + 1; + } +} +simdjson_inline tape_type tape_ref::tape_ref_type() const noexcept { + return static_cast(doc->tape[json_index] >> 56); +} +simdjson_inline uint64_t internal::tape_ref::tape_value() const noexcept { + return doc->tape[json_index] & internal::JSON_VALUE_MASK; +} +simdjson_inline uint32_t internal::tape_ref::matching_brace_index() const noexcept { + return uint32_t(doc->tape[json_index]); +} +simdjson_inline uint32_t internal::tape_ref::scope_count() const noexcept { + return uint32_t((doc->tape[json_index] >> 32) & internal::JSON_COUNT_MASK); +} + +template +simdjson_inline T tape_ref::next_tape_value() const noexcept { + static_assert(sizeof(T) == sizeof(uint64_t), "next_tape_value() template parameter must be 64-bit"); + // Though the following is tempting... + // return *reinterpret_cast(&doc->tape[json_index + 1]); + // It is not generally safe. It is safer, and often faster to rely + // on memcpy. Yes, it is uglier, but it is also encapsulated. + T x; + std::memcpy(&x,&doc->tape[json_index + 1],sizeof(uint64_t)); + return x; +} + +simdjson_inline uint32_t internal::tape_ref::get_string_length() const noexcept { + size_t string_buf_index = size_t(tape_value()); + uint32_t len; + std::memcpy(&len, &doc->string_buf[string_buf_index], sizeof(len)); + return len; +} + +simdjson_inline const char * internal::tape_ref::get_c_str() const noexcept { + size_t string_buf_index = size_t(tape_value()); + return reinterpret_cast(&doc->string_buf[string_buf_index + sizeof(uint32_t)]); +} + +inline std::string_view internal::tape_ref::get_string_view() const noexcept { + return std::string_view( + get_c_str(), + get_string_length() + ); +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_TAPE_REF_INL_H +/* end file simdjson/internal/tape_ref-inl.h */ + +#include + +namespace simdjson { + +// +// simdjson_result inline implementation +// +simdjson_inline simdjson_result::simdjson_result() noexcept + : internal::simdjson_result_base() {} +simdjson_inline simdjson_result::simdjson_result(dom::array value) noexcept + : internal::simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : internal::simdjson_result_base(error) {} + +#if SIMDJSON_EXCEPTIONS + +inline dom::array::iterator simdjson_result::begin() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.begin(); +} +inline dom::array::iterator simdjson_result::end() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.end(); +} +inline size_t simdjson_result::size() const noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first.size(); +} + +#endif // SIMDJSON_EXCEPTIONS + +inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) const noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +inline simdjson_result simdjson_result::at(size_t index) const noexcept { + if (error()) { return error(); } + return first.at(index); +} + +namespace dom { + +// +// array inline implementation +// +simdjson_inline array::array() noexcept : tape{} {} +simdjson_inline array::array(const internal::tape_ref &_tape) noexcept : tape{_tape} {} +inline array::iterator array::begin() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.json_index + 1); +} +inline array::iterator array::end() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return internal::tape_ref(tape.doc, tape.after_element() - 1); +} +inline size_t array::size() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.scope_count(); +} +inline size_t array::number_of_slots() const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + return tape.matching_brace_index() - tape.json_index; +} +inline simdjson_result array::at_pointer(std::string_view json_pointer) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + if(json_pointer.empty()) { // an empty string means that we return the current node + return element(this->tape); // copy the current node + } else if(json_pointer[0] != '/') { // otherwise there is an error + return INVALID_JSON_POINTER; + } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + + // Get the child + auto child = array(tape).at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +inline simdjson_result array::at(size_t index) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + size_t i=0; + for (auto element : *this) { + if (i == index) { return element; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +// +// array::iterator inline implementation +// +simdjson_inline array::iterator::iterator(const internal::tape_ref &_tape) noexcept : tape{_tape} { } +inline element array::iterator::operator*() const noexcept { + return element(tape); +} +inline array::iterator& array::iterator::operator++() noexcept { + tape.json_index = tape.after_element(); + return *this; +} +inline array::iterator array::iterator::operator++(int) noexcept { + array::iterator out = *this; + ++*this; + return out; +} +inline bool array::iterator::operator!=(const array::iterator& other) const noexcept { + return tape.json_index != other.tape.json_index; +} +inline bool array::iterator::operator==(const array::iterator& other) const noexcept { + return tape.json_index == other.tape.json_index; +} +inline bool array::iterator::operator<(const array::iterator& other) const noexcept { + return tape.json_index < other.tape.json_index; +} +inline bool array::iterator::operator<=(const array::iterator& other) const noexcept { + return tape.json_index <= other.tape.json_index; +} +inline bool array::iterator::operator>=(const array::iterator& other) const noexcept { + return tape.json_index >= other.tape.json_index; +} +inline bool array::iterator::operator>(const array::iterator& other) const noexcept { + return tape.json_index > other.tape.json_index; +} + +} // namespace dom + + +} // namespace simdjson + +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ + +#if defined(__cpp_lib_ranges) +static_assert(std::ranges::view); +static_assert(std::ranges::sized_range); +#if SIMDJSON_EXCEPTIONS +static_assert(std::ranges::view>); +static_assert(std::ranges::sized_range>); +#endif // SIMDJSON_EXCEPTIONS +#endif // defined(__cpp_lib_ranges) + +#endif // SIMDJSON_ARRAY_INL_H +/* end file simdjson/dom/array-inl.h */ +/* skipped duplicate #include "simdjson/dom/document_stream-inl.h" */ +/* including simdjson/dom/document-inl.h: #include "simdjson/dom/document-inl.h" */ +/* begin file simdjson/dom/document-inl.h */ +#ifndef SIMDJSON_DOCUMENT_INL_H +#define SIMDJSON_DOCUMENT_INL_H + +// Inline implementations go in here. + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/document.h" */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ +/* including simdjson/internal/jsonformatutils.h: #include "simdjson/internal/jsonformatutils.h" */ +/* begin file simdjson/internal/jsonformatutils.h */ +#ifndef SIMDJSON_INTERNAL_JSONFORMATUTILS_H +#define SIMDJSON_INTERNAL_JSONFORMATUTILS_H + +/* skipped duplicate #include "simdjson/base.h" */ +#include +#include +#include + +namespace simdjson { +namespace internal { + +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &str); + +class escape_json_string { +public: + escape_json_string(std::string_view _str) noexcept : str{_str} {} + operator std::string() const noexcept { std::stringstream s; s << *this; return s.str(); } +private: + std::string_view str; + friend std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped); +}; + +inline std::ostream& operator<<(std::ostream& out, const escape_json_string &unescaped) { + for (size_t i=0; i(unescaped.str[i]) <= 0x1F) { + // TODO can this be done once at the beginning, or will it mess up << char? + std::ios::fmtflags f(out.flags()); + out << "\\u" << std::hex << std::setw(4) << std::setfill('0') << int(unescaped.str[i]); + out.flags(f); + } else { + out << unescaped.str[i]; + } + } + } + return out; +} + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONFORMATUTILS_H +/* end file simdjson/internal/jsonformatutils.h */ + +#include + +namespace simdjson { +namespace dom { + +// +// document inline implementation +// +inline element document::root() const noexcept { + return element(internal::tape_ref(this, 1)); +} +simdjson_warn_unused +inline size_t document::capacity() const noexcept { + return allocated_capacity; +} + +simdjson_warn_unused +inline error_code document::allocate(size_t capacity) noexcept { + if (capacity == 0) { + string_buf.reset(); + tape.reset(); + allocated_capacity = 0; + return SUCCESS; + } + + // a pathological input like "[[[[..." would generate capacity tape elements, so + // need a capacity of at least capacity + 1, but it is also possible to do + // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6" + //where capacity + 1 tape elements are + // generated, see issue https://github.com/simdjson/simdjson/issues/345 + size_t tape_capacity = SIMDJSON_ROUNDUP_N(capacity + 3, 64); + // a document with only zero-length strings... could have capacity/3 string + // and we would need capacity/3 * 5 bytes on the string buffer + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset( new (std::nothrow) uint8_t[string_capacity]); + tape.reset(new (std::nothrow) uint64_t[tape_capacity]); + if(!(string_buf && tape)) { + allocated_capacity = 0; + string_buf.reset(); + tape.reset(); + return MEMALLOC; + } + // Technically the allocated_capacity might be larger than capacity + // so the next line is pessimistic. + allocated_capacity = capacity; + return SUCCESS; +} + +inline bool document::dump_raw_tape(std::ostream &os) const noexcept { + uint32_t string_length; + size_t tape_idx = 0; + uint64_t tape_val = tape[tape_idx]; + uint8_t type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type; + tape_idx++; + size_t how_many = 0; + if (type == 'r') { + how_many = size_t(tape_val & internal::JSON_VALUE_MASK); + } else { + // Error: no starting root node? + return false; + } + os << "\t// pointing to " << how_many << " (right after last node)\n"; + uint64_t payload; + for (; tape_idx < how_many; tape_idx++) { + os << tape_idx << " : "; + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + switch (type) { + case '"': // we have a string + os << "string \""; + std::memcpy(&string_length, string_buf.get() + payload, sizeof(uint32_t)); + os << internal::escape_json_string(std::string_view( + reinterpret_cast(string_buf.get() + payload + sizeof(uint32_t)), + string_length + )); + os << '"'; + os << '\n'; + break; + case 'l': // we have a long int + if (tape_idx + 1 >= how_many) { + return false; + } + os << "integer " << static_cast(tape[++tape_idx]) << "\n"; + break; + case 'u': // we have a long uint + if (tape_idx + 1 >= how_many) { + return false; + } + os << "unsigned integer " << tape[++tape_idx] << "\n"; + break; + case 'd': // we have a double + os << "float "; + if (tape_idx + 1 >= how_many) { + return false; + } + double answer; + std::memcpy(&answer, &tape[++tape_idx], sizeof(answer)); + os << answer << '\n'; + break; + case 'n': // we have a null + os << "null\n"; + break; + case 't': // we have a true + os << "true\n"; + break; + case 'f': // we have a false + os << "false\n"; + break; + case '{': // we have an object + os << "{\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; case '}': // we end an object + os << "}\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case '[': // we start an array + os << "[\t// pointing to next tape location " << uint32_t(payload) + << " (first node after the scope), " + << " saturated count " + << ((payload >> 32) & internal::JSON_COUNT_MASK)<< "\n"; + break; + case ']': // we end an array + os << "]\t// pointing to previous tape location " << uint32_t(payload) + << " (start of the scope)\n"; + break; + case 'r': // we start and end with the root node + // should we be hitting the root node? + return false; + default: + return false; + } + } + tape_val = tape[tape_idx]; + payload = tape_val & internal::JSON_VALUE_MASK; + type = uint8_t(tape_val >> 56); + os << tape_idx << " : " << type << "\t// pointing to " << payload + << " (start root)\n"; + return true; +} + +} // namespace dom +} // namespace simdjson + +#endif // SIMDJSON_DOCUMENT_INL_H +/* end file simdjson/dom/document-inl.h */ +/* skipped duplicate #include "simdjson/dom/element-inl.h" */ +/* skipped duplicate #include "simdjson/dom/object-inl.h" */ +/* including simdjson/dom/parsedjson_iterator-inl.h: #include "simdjson/dom/parsedjson_iterator-inl.h" */ +/* begin file simdjson/dom/parsedjson_iterator-inl.h */ +#ifndef SIMDJSON_PARSEDJSON_ITERATOR_INL_H +#define SIMDJSON_PARSEDJSON_ITERATOR_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/parsedjson_iterator.h" */ +/* skipped duplicate #include "simdjson/internal/jsonformatutils.h" */ + +/* skipped duplicate #include "simdjson/dom/parser-inl.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ + +#include +#include +#include +#include + +#ifndef SIMDJSON_DISABLE_DEPRECATED_API + +namespace simdjson { + +// VS2017 reports deprecated warnings when you define a deprecated class's methods. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_DEPRECATED_WARNING + +// Because of template weirdness, the actual class definition is inline in the document class +simdjson_warn_unused bool dom::parser::Iterator::is_ok() const { + return location < tape_length; +} + +// useful for debugging purposes +size_t dom::parser::Iterator::get_tape_location() const { + return location; +} + +// useful for debugging purposes +size_t dom::parser::Iterator::get_tape_length() const { + return tape_length; +} + +// returns the current depth (start at 1 with 0 reserved for the fictitious root +// node) +size_t dom::parser::Iterator::get_depth() const { + return depth; +} + +// A scope is a series of nodes at the same depth, typically it is either an +// object ({) or an array ([). The root node has type 'r'. +uint8_t dom::parser::Iterator::get_scope_type() const { + return depth_index[depth].scope_type; +} + +bool dom::parser::Iterator::move_forward() { + if (location + 1 >= tape_length) { + return false; // we are at the end! + } + + if ((current_type == '[') || (current_type == '{')) { + // We are entering a new scope + depth++; + assert(depth < max_depth); + depth_index[depth].start_of_scope = location; + depth_index[depth].scope_type = current_type; + } else if ((current_type == ']') || (current_type == '}')) { + // Leaving a scope. + depth--; + } else if (is_number()) { + // these types use 2 locations on the tape, not just one. + location += 1; + } + + location += 1; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; +} + +void dom::parser::Iterator::move_to_value() { + // assume that we are on a key, so move by 1. + location += 1; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); +} + +bool dom::parser::Iterator::move_to_key(const char *key) { + if (down()) { + do { + const bool right_key = (strcmp(get_string(), key) == 0); + move_to_value(); + if (right_key) { + return true; + } + } while (next()); + up(); + } + return false; +} + +bool dom::parser::Iterator::move_to_key_insensitive( + const char *key) { + if (down()) { + do { + const bool right_key = (simdjson_strcasecmp(get_string(), key) == 0); + move_to_value(); + if (right_key) { + return true; + } + } while (next()); + up(); + } + return false; +} + +bool dom::parser::Iterator::move_to_key(const char *key, + uint32_t length) { + if (down()) { + do { + bool right_key = ((get_string_length() == length) && + (memcmp(get_string(), key, length) == 0)); + move_to_value(); + if (right_key) { + return true; + } + } while (next()); + up(); + } + return false; +} + +bool dom::parser::Iterator::move_to_index(uint32_t index) { + if (down()) { + uint32_t i = 0; + for (; i < index; i++) { + if (!next()) { + break; + } + } + if (i == index) { + return true; + } + up(); + } + return false; +} + +bool dom::parser::Iterator::prev() { + size_t target_location = location; + to_start_scope(); + size_t npos = location; + if (target_location == npos) { + return false; // we were already at the start + } + size_t oldnpos; + // we have that npos < target_location here + do { + oldnpos = npos; + if ((current_type == '[') || (current_type == '{')) { + // we need to jump + npos = uint32_t(current_val); + } else { + npos = npos + ((current_type == 'd' || current_type == 'l') ? 2 : 1); + } + } while (npos < target_location); + location = oldnpos; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; +} + +bool dom::parser::Iterator::up() { + if (depth == 1) { + return false; // don't allow moving back to root + } + to_start_scope(); + // next we just move to the previous value + depth--; + location -= 1; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; +} + +bool dom::parser::Iterator::down() { + if (location + 1 >= tape_length) { + return false; + } + if ((current_type == '[') || (current_type == '{')) { + size_t npos = uint32_t(current_val); + if (npos == location + 2) { + return false; // we have an empty scope + } + depth++; + assert(depth < max_depth); + location = location + 1; + depth_index[depth].start_of_scope = location; + depth_index[depth].scope_type = current_type; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + return true; + } + return false; +} + +void dom::parser::Iterator::to_start_scope() { + location = depth_index[depth].start_of_scope; + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); +} + +inline void dom::parser::Iterator::rewind() { + while (up()) + ; +} + + +bool dom::parser::Iterator::next() { + size_t npos; + if ((current_type == '[') || (current_type == '{')) { + // we need to jump + npos = uint32_t(current_val); + } else { + npos = location + (is_number() ? 2 : 1); + } + uint64_t next_val = doc.tape[npos]; + uint8_t next_type = uint8_t(next_val >> 56); + if ((next_type == ']') || (next_type == '}')) { + return false; // we reached the end of the scope + } + location = npos; + current_val = next_val; + current_type = next_type; + return true; +} +dom::parser::Iterator::Iterator(const dom::parser &pj) noexcept(false) + : doc(pj.doc) +{ +#if SIMDJSON_EXCEPTIONS + if (!pj.valid) { throw simdjson_error(pj.error); } +#else + if (!pj.valid) { return; } // abort() usage is forbidden in the library +#endif + + max_depth = pj.max_depth(); + depth_index = new scopeindex_t[max_depth + 1]; + depth_index[0].start_of_scope = location; + current_val = doc.tape[location++]; + current_type = uint8_t(current_val >> 56); + depth_index[0].scope_type = current_type; + tape_length = size_t(current_val & internal::JSON_VALUE_MASK); + if (location < tape_length) { + // If we make it here, then depth_capacity must >=2, but the compiler + // may not know this. + current_val = doc.tape[location]; + current_type = uint8_t(current_val >> 56); + depth++; + assert(depth < max_depth); + depth_index[depth].start_of_scope = location; + depth_index[depth].scope_type = current_type; + } +} +dom::parser::Iterator::Iterator( + const dom::parser::Iterator &o) noexcept + : doc(o.doc), + max_depth(o.depth), + depth(o.depth), + location(o.location), + tape_length(o.tape_length), + current_type(o.current_type), + current_val(o.current_val) +{ + depth_index = new scopeindex_t[max_depth+1]; + std::memcpy(depth_index, o.depth_index, (depth + 1) * sizeof(depth_index[0])); +} + +dom::parser::Iterator::~Iterator() noexcept { + if (depth_index) { delete[] depth_index; } +} + +bool dom::parser::Iterator::print(std::ostream &os, bool escape_strings) const { + if (!is_ok()) { + return false; + } + switch (current_type) { + case '"': // we have a string + os << '"'; + if (escape_strings) { + os << internal::escape_json_string(std::string_view(get_string(), get_string_length())); + } else { + // was: os << get_string();, but given that we can include null chars, we + // have to do something crazier: + std::copy(get_string(), get_string() + get_string_length(), std::ostream_iterator(os)); + } + os << '"'; + break; + case 'l': // we have a long int + os << get_integer(); + break; + case 'u': + os << get_unsigned_integer(); + break; + case 'd': + os << get_double(); + break; + case 'n': // we have a null + os << "null"; + break; + case 't': // we have a true + os << "true"; + break; + case 'f': // we have a false + os << "false"; + break; + case '{': // we have an object + case '}': // we end an object + case '[': // we start an array + case ']': // we end an array + os << char(current_type); + break; + default: + return false; + } + return true; +} + +bool dom::parser::Iterator::move_to(const char *pointer, + uint32_t length) { + char *new_pointer = nullptr; + if (pointer[0] == '#') { + // Converting fragment representation to string representation + new_pointer = new char[length]; + uint32_t new_length = 0; + for (uint32_t i = 1; i < length; i++) { + if (pointer[i] == '%' && pointer[i + 1] == 'x') { +#if __cpp_exceptions + try { +#endif + int fragment = + std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16); + if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) { + // escaping the character + new_pointer[new_length] = '\\'; + new_length++; + } + new_pointer[new_length] = char(fragment); + i += 3; +#if __cpp_exceptions + } catch (std::invalid_argument &) { + delete[] new_pointer; + return false; // the fragment is invalid + } +#endif + } else { + new_pointer[new_length] = pointer[i]; + } + new_length++; + } + length = new_length; + pointer = new_pointer; + } + + // saving the current state + size_t depth_s = depth; + size_t location_s = location; + uint8_t current_type_s = current_type; + uint64_t current_val_s = current_val; + + rewind(); // The json pointer is used from the root of the document. + + bool found = relative_move_to(pointer, length); + delete[] new_pointer; + + if (!found) { + // since the pointer has found nothing, we get back to the original + // position. + depth = depth_s; + location = location_s; + current_type = current_type_s; + current_val = current_val_s; + } + + return found; +} + +inline bool dom::parser::Iterator::move_to(const std::string &pointer) { + return move_to(pointer.c_str(), uint32_t(pointer.length())); +} + +inline int64_t dom::parser::Iterator::get_integer() const { + if (location + 1 >= tape_length) { + return 0; // default value in case of error + } + return static_cast(doc.tape[location + 1]); +} + +inline uint64_t dom::parser::Iterator::get_unsigned_integer() const { + if (location + 1 >= tape_length) { + return 0; // default value in case of error + } + return doc.tape[location + 1]; +} + +inline const char * dom::parser::Iterator::get_string() const { + return reinterpret_cast( + doc.string_buf.get() + (current_val & internal::JSON_VALUE_MASK) + sizeof(uint32_t)); +} + +inline uint32_t dom::parser::Iterator::get_string_length() const { + uint32_t answer; + std::memcpy(&answer, + reinterpret_cast(doc.string_buf.get() + + (current_val & internal::JSON_VALUE_MASK)), + sizeof(uint32_t)); + return answer; +} + +inline double dom::parser::Iterator::get_double() const { + if (location + 1 >= tape_length) { + return std::numeric_limits::quiet_NaN(); // default value in + // case of error + } + double answer; + std::memcpy(&answer, &doc.tape[location + 1], sizeof(answer)); + return answer; +} + +bool dom::parser::Iterator::relative_move_to(const char *pointer, + uint32_t length) { + if (length == 0) { + // returns the whole document + return true; + } + + if (pointer[0] != '/') { + // '/' must be the first character + return false; + } + + // finding the key in an object or the index in an array + std::string key_or_index; + uint32_t offset = 1; + + // checking for the "-" case + if (is_array() && pointer[1] == '-') { + if (length != 2) { + // the pointer must be exactly "/-" + // there can't be anything more after '-' as an index + return false; + } + key_or_index = '-'; + offset = length; // will skip the loop coming right after + } + + // We either transform the first reference token to a valid json key + // or we make sure it is a valid index in an array. + for (; offset < length; offset++) { + if (pointer[offset] == '/') { + // beginning of the next key or index + break; + } + if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) { + // the index of an array must be an integer + // we also make sure std::stoi won't discard whitespaces later + return false; + } + if (pointer[offset] == '~') { + // "~1" represents "/" + if (pointer[offset + 1] == '1') { + key_or_index += '/'; + offset++; + continue; + } + // "~0" represents "~" + if (pointer[offset + 1] == '0') { + key_or_index += '~'; + offset++; + continue; + } + } + if (pointer[offset] == '\\') { + if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' || + (pointer[offset + 1] <= 0x1F)) { + key_or_index += pointer[offset + 1]; + offset++; + continue; + } + return false; // invalid escaped character + } + if (pointer[offset] == '\"') { + // unescaped quote character. this is an invalid case. + // lets do nothing and assume most pointers will be valid. + // it won't find any corresponding json key anyway. + // return false; + } + key_or_index += pointer[offset]; + } + + bool found = false; + if (is_object()) { + if (move_to_key(key_or_index.c_str(), uint32_t(key_or_index.length()))) { + found = relative_move_to(pointer + offset, length - offset); + } + } else if (is_array()) { + if (key_or_index == "-") { // handling "-" case first + if (down()) { + while (next()) + ; // moving to the end of the array + // moving to the nonexistent value right after... + size_t npos; + if ((current_type == '[') || (current_type == '{')) { + // we need to jump + npos = uint32_t(current_val); + } else { + npos = + location + ((current_type == 'd' || current_type == 'l') ? 2 : 1); + } + location = npos; + current_val = doc.tape[npos]; + current_type = uint8_t(current_val >> 56); + return true; // how could it fail ? + } + } else { // regular numeric index + // The index can't have a leading '0' + if (key_or_index[0] == '0' && key_or_index.length() > 1) { + return false; + } + // it cannot be empty + if (key_or_index.length() == 0) { + return false; + } + // we already checked the index contains only valid digits + uint32_t index = std::stoi(key_or_index); + if (move_to_index(index)) { + found = relative_move_to(pointer + offset, length - offset); + } + } + } + + return found; +} + +SIMDJSON_POP_DISABLE_WARNINGS +} // namespace simdjson + +#endif // SIMDJSON_DISABLE_DEPRECATED_API + + +#endif // SIMDJSON_PARSEDJSON_ITERATOR_INL_H +/* end file simdjson/dom/parsedjson_iterator-inl.h */ +/* skipped duplicate #include "simdjson/dom/parser-inl.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ +/* including simdjson/dom/serialization-inl.h: #include "simdjson/dom/serialization-inl.h" */ +/* begin file simdjson/dom/serialization-inl.h */ + +#ifndef SIMDJSON_SERIALIZATION_INL_H +#define SIMDJSON_SERIALIZATION_INL_H + +/* skipped duplicate #include "simdjson/dom/base.h" */ +/* skipped duplicate #include "simdjson/dom/serialization.h" */ +/* skipped duplicate #include "simdjson/dom/parser.h" */ +/* skipped duplicate #include "simdjson/internal/tape_type.h" */ + +/* skipped duplicate #include "simdjson/dom/array-inl.h" */ +/* skipped duplicate #include "simdjson/dom/object-inl.h" */ +/* skipped duplicate #include "simdjson/internal/tape_ref-inl.h" */ + +#include + +namespace simdjson { +namespace dom { +inline bool parser::print_json(std::ostream &os) const noexcept { + if (!valid) { return false; } + simdjson::internal::string_builder<> sb; + sb.append(doc.root()); + std::string_view answer = sb.str(); + os << answer; + return true; +} + +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); +} +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#endif + +} // namespace dom + +/*** + * Number utility functions + **/ +namespace { +/**@private + * Escape sequence like \b or \u0001 + * We expect that most compilers will use 8 bytes for this data structure. + **/ +struct escape_sequence { + uint8_t length; + const char string[7]; // technically, we only ever need 6 characters, we pad to 8 +}; +/**@private + * This converts a signed integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 20 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +static char *fast_itoa(char *output, int64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + uint64_t value_positive; + // In general, negating a signed integer is unsafe. + if(value < 0) { + *output++ = '-'; + // Doing value_positive = -value; while avoiding + // undefined behavior warnings. + // It assumes two complement's which is universal at this + // point in time. + std::memcpy(&value_positive, &value, sizeof(value)); + value_positive = (~value_positive) + 1; // this is a negation + } else { + value_positive = value; + } + // We work solely with value_positive. It *might* be easier + // for an optimizing compiler to deal with an unsigned variable + // as far as performance goes. + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value_positive >= 10) { + *write_pointer-- = char('0' + (value_positive % 10)); + value_positive /= 10; + } + *write_pointer = char('0' + value_positive); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; +} +/**@private + * This converts an unsigned integer into a character sequence. + * The caller is responsible for providing enough memory (at least + * 19 characters.) + * Though various runtime libraries provide itoa functions, + * it is not part of the C++ standard. The C++17 standard + * adds the to_chars functions which would do as well, but + * we want to support C++11. + */ +static char *fast_itoa(char *output, uint64_t value) noexcept { + // This is a standard implementation of itoa. + char buffer[20]; + const char *const end_buffer = buffer + 20; + char *write_pointer = buffer + 19; + // A faster approach is possible if we expect large integers: + // unroll the loop (work in 100s, 1000s) and use some kind of + // memoization. + while(value >= 10) { + *write_pointer-- = char('0' + (value % 10)); + value /= 10; + }; + *write_pointer = char('0' + value); + size_t len = end_buffer - write_pointer; + std::memcpy(output, write_pointer, len); + return output + len; +} + + +} // anonymous namespace +namespace internal { + +/*** + * Minifier/formatter code. + **/ + +template +simdjson_inline void base_formatter::number(uint64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +template +simdjson_inline void base_formatter::number(int64_t x) { + char number_buffer[24]; + char *newp = fast_itoa(number_buffer, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +template +simdjson_inline void base_formatter::number(double x) { + char number_buffer[24]; + // Currently, passing the nullptr to the second argument is + // safe because our implementation does not check the second + // argument. + char *newp = internal::to_chars(number_buffer, nullptr, x); + buffer.insert(buffer.end(), number_buffer, newp); +} + +template +simdjson_inline void base_formatter::start_array() { one_char('['); } + + +template +simdjson_inline void base_formatter::end_array() { one_char(']'); } + +template +simdjson_inline void base_formatter::start_object() { one_char('{'); } + +template +simdjson_inline void base_formatter::end_object() { one_char('}'); } + +template +simdjson_inline void base_formatter::comma() { one_char(','); } + +template +simdjson_inline void base_formatter::true_atom() { + const char * s = "true"; + buffer.insert(buffer.end(), s, s + 4); +} + +template +simdjson_inline void base_formatter::false_atom() { + const char * s = "false"; + buffer.insert(buffer.end(), s, s + 5); +} + +template +simdjson_inline void base_formatter::null_atom() { + const char * s = "null"; + buffer.insert(buffer.end(), s, s + 4); +} + +template +simdjson_inline void base_formatter::one_char(char c) { buffer.push_back(c); } + +template +simdjson_inline void base_formatter::key(std::string_view unescaped) { + string(unescaped); + one_char(':'); +} + +template +simdjson_inline void base_formatter::string(std::string_view unescaped) { + one_char('\"'); + size_t i = 0; + // Fast path for the case where we have no control character, no ", and no backslash. + // This should include most keys. + // + // We would like to use 'bool' but some compilers take offense to bitwise operation + // with bool types. + constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for(;i + 8 <= unescaped.length(); i += 8) { + // Poor's man vectorization. This could get much faster if we used SIMD. + // + // It is not the case that replacing '|' with '||' would be neutral performance-wise. + if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] + | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] + | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] + | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] + ) { break; } + } + for(;i < unescaped.length(); i++) { + if(needs_escaping[uint8_t(unescaped[i])]) { break; } + } + // The following is also possible and omits a 256-byte table, but it is slower: + // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) + // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} + + // At least for long strings, the following should be fast. We could + // do better by integrating the checks and the insertion. + buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); + // We caught a control character if we enter this loop (slow). + // Note that we are do not restart from the beginning, but rather we continue + // from the point where we encountered something that requires escaping. + for (; i < unescaped.length(); i++) { + switch (unescaped[i]) { + case '\"': + { + const char * s = "\\\""; + buffer.insert(buffer.end(), s, s + 2); + } + break; + case '\\': + { + const char * s = "\\\\"; + buffer.insert(buffer.end(), s, s + 2); + } + break; + default: + if (uint8_t(unescaped[i]) <= 0x1F) { + // If packed, this uses 8 * 32 bytes. + // Note that we expect most compilers to embed this code in the data + // section. + constexpr static escape_sequence escaped[32] = { + {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, + {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, + {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, + {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, + {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, + {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, + {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, + {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; + auto u = escaped[uint8_t(unescaped[i])]; + buffer.insert(buffer.end(), u.string, u.string + u.length); + } else { + one_char(unescaped[i]); + } + } // switch + } // for + one_char('\"'); +} + + +template +inline void base_formatter::clear() { + buffer.clear(); +} + +template +simdjson_inline std::string_view base_formatter::str() const { + return std::string_view(buffer.data(), buffer.size()); +} + +simdjson_inline void mini_formatter::print_newline() { + return; +} + +simdjson_inline void mini_formatter::print_indents(size_t depth) { + (void)depth; + return; +} + +simdjson_inline void mini_formatter::print_space() { + return; +} + +simdjson_inline void pretty_formatter::print_newline() { + one_char('\n'); +} + +simdjson_inline void pretty_formatter::print_indents(size_t depth) { + if(this->indent_step <= 0) { + return; + } + for(size_t i = 0; i < this->indent_step * depth; i++) { + one_char(' '); + } +} + +simdjson_inline void pretty_formatter::print_space() { + one_char(' '); +} + +/*** + * String building code. + **/ + +template +inline void string_builder::append(simdjson::dom::element value) { + // using tape_type = simdjson::internal::tape_type; + size_t depth = 0; + constexpr size_t MAX_DEPTH = 16; + bool is_object[MAX_DEPTH]; + is_object[0] = false; + bool after_value = false; + + internal::tape_ref iter(value.tape); + do { + // print commas after each value + if (after_value) { + format.comma(); + format.print_newline(); + } + + format.print_indents(depth); + + // If we are in an object, print the next key and :, and skip to the next + // value. + if (is_object[depth]) { + format.key(iter.get_string_view()); + format.print_space(); + iter.json_index++; + } + switch (iter.tape_ref_type()) { + + // Arrays + case tape_type::START_ARRAY: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::array(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the ] + depth--; + break; + } + + // Output start [ + format.start_array(); + iter.json_index++; + + // Handle empty [] (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + depth--; + break; + } + + is_object[depth] = false; + after_value = false; + format.print_newline(); + continue; + } + + // Objects + case tape_type::START_OBJECT: { + // If we're too deep, we need to recurse to go deeper. + depth++; + if (simdjson_unlikely(depth >= MAX_DEPTH)) { + append(simdjson::dom::object(iter)); + iter.json_index = iter.matching_brace_index() - 1; // Jump to the } + depth--; + break; + } + + // Output start { + format.start_object(); + iter.json_index++; + + // Handle empty {} (we don't want to come back around and print commas) + if (iter.tape_ref_type() == tape_type::END_OBJECT) { + format.end_object(); + depth--; + break; + } + + is_object[depth] = true; + after_value = false; + format.print_newline(); + continue; + } + + // Scalars + case tape_type::STRING: + format.string(iter.get_string_view()); + break; + case tape_type::INT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::UINT64: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::DOUBLE: + format.number(iter.next_tape_value()); + iter.json_index++; // numbers take up 2 spots, so we need to increment + // extra + break; + case tape_type::TRUE_VALUE: + format.true_atom(); + break; + case tape_type::FALSE_VALUE: + format.false_atom(); + break; + case tape_type::NULL_VALUE: + format.null_atom(); + break; + + // These are impossible + case tape_type::END_ARRAY: + case tape_type::END_OBJECT: + case tape_type::ROOT: + SIMDJSON_UNREACHABLE(); + } + iter.json_index++; + after_value = true; + + // Handle multiple ends in a row + while (depth != 0 && (iter.tape_ref_type() == tape_type::END_ARRAY || + iter.tape_ref_type() == tape_type::END_OBJECT)) { + format.print_newline(); + depth--; + format.print_indents(depth); + if (iter.tape_ref_type() == tape_type::END_ARRAY) { + format.end_array(); + } else { + format.end_object(); + } + iter.json_index++; + } + + // Stop when we're at depth 0 + } while (depth != 0); + + format.print_newline(); +} + +template +inline void string_builder::append(simdjson::dom::object value) { + format.start_object(); + auto pair = value.begin(); + auto end = value.end(); + if (pair != end) { + append(*pair); + for (++pair; pair != end; ++pair) { + format.comma(); + append(*pair); + } + } + format.end_object(); +} + +template +inline void string_builder::append(simdjson::dom::array value) { + format.start_array(); + auto iter = value.begin(); + auto end = value.end(); + if (iter != end) { + append(*iter); + for (++iter; iter != end; ++iter) { + format.comma(); + append(*iter); + } + } + format.end_array(); +} + +template +simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { + format.key(kv.key); + append(kv.value); +} + +template +simdjson_inline void string_builder::clear() { + format.clear(); +} + +template +simdjson_inline std::string_view string_builder::str() const { + return format.str(); +} + + +} // namespace internal +} // namespace simdjson + +#endif +/* end file simdjson/dom/serialization-inl.h */ + +#endif // SIMDJSON_DOM_H +/* end file simdjson/dom.h */ +/* including simdjson/ondemand.h: #include "simdjson/ondemand.h" */ +/* begin file simdjson/ondemand.h */ +#ifndef SIMDJSON_ONDEMAND_H +#define SIMDJSON_ONDEMAND_H + +/* including simdjson/builtin/ondemand.h: #include "simdjson/builtin/ondemand.h" */ +/* begin file simdjson/builtin/ondemand.h */ +#ifndef SIMDJSON_BUILTIN_ONDEMAND_H +#define SIMDJSON_BUILTIN_ONDEMAND_H + +/* including simdjson/builtin.h: #include "simdjson/builtin.h" */ +/* begin file simdjson/builtin.h */ +#ifndef SIMDJSON_BUILTIN_H +#define SIMDJSON_BUILTIN_H + +/* including simdjson/builtin/base.h: #include "simdjson/builtin/base.h" */ +/* begin file simdjson/builtin/base.h */ +#ifndef SIMDJSON_BUILTIN_BASE_H +#define SIMDJSON_BUILTIN_BASE_H + +/* skipped duplicate #include "simdjson/base.h" */ +/* including simdjson/implementation_detection.h: #include "simdjson/implementation_detection.h" */ +/* begin file simdjson/implementation_detection.h */ +#ifndef SIMDJSON_IMPLEMENTATION_DETECTION_H +#define SIMDJSON_IMPLEMENTATION_DETECTION_H + +/* skipped duplicate #include "simdjson/base.h" */ + +// 0 is reserved, because undefined SIMDJSON_IMPLEMENTATION equals 0 in preprocessor macros. +#define SIMDJSON_IMPLEMENTATION_ID_arm64 1 +#define SIMDJSON_IMPLEMENTATION_ID_fallback 2 +#define SIMDJSON_IMPLEMENTATION_ID_haswell 3 +#define SIMDJSON_IMPLEMENTATION_ID_icelake 4 +#define SIMDJSON_IMPLEMENTATION_ID_ppc64 5 +#define SIMDJSON_IMPLEMENTATION_ID_westmere 6 + +#define SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) SIMDJSON_CAT(SIMDJSON_IMPLEMENTATION_ID_, IMPL) +#define SIMDJSON_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_IMPLEMENTATION) + +#define SIMDJSON_IMPLEMENTATION_IS(IMPL) SIMDJSON_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +// +// First, figure out which implementations can be run. Doing it here makes it so we don't have to worry about the order +// in which we include them. +// + +#ifndef SIMDJSON_IMPLEMENTATION_ARM64 +#define SIMDJSON_IMPLEMENTATION_ARM64 (SIMDJSON_IS_ARM64) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_ARM64 SIMDJSON_IMPLEMENTATION_ARM64 && SIMDJSON_IS_ARM64 + +// Default Icelake to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_ICELAKE +#define SIMDJSON_IMPLEMENTATION_ICELAKE ((SIMDJSON_IS_X86_64) && (SIMDJSON_AVX512_ALLOWED) && (SIMDJSON_COMPILER_SUPPORTS_VBMI2)) +#endif + +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#else +#define SIMDJSON_CAN_ALWAYS_RUN_ICELAKE ((SIMDJSON_IMPLEMENTATION_ICELAKE) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__) && (__AVX512F__) && (__AVX512DQ__) && (__AVX512CD__) && (__AVX512BW__) && (__AVX512VL__) && (__AVX512VBMI2__)) +#endif + +// Default Haswell to on if this is x86-64. Even if we're not compiled for it, it could be selected +// at runtime. +#ifndef SIMDJSON_IMPLEMENTATION_HASWELL +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +// if icelake is always available, never enable haswell. +#define SIMDJSON_IMPLEMENTATION_HASWELL 0 +#else +#define SIMDJSON_IMPLEMENTATION_HASWELL SIMDJSON_IS_X86_64 +#endif +#endif +#ifdef _MSC_VER +// To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this next line, see +// https://github.com/simdjson/simdjson/issues/1247 +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__)) +#else +#define SIMDJSON_CAN_ALWAYS_RUN_HASWELL ((SIMDJSON_IMPLEMENTATION_HASWELL) && (SIMDJSON_IS_X86_64) && (__AVX2__) && (__BMI__) && (__PCLMUL__) && (__LZCNT__)) +#endif + +// Default Westmere to on if this is x86-64. +#ifndef SIMDJSON_IMPLEMENTATION_WESTMERE +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL +// if icelake or haswell are always available, never enable westmere. +#define SIMDJSON_IMPLEMENTATION_WESTMERE 0 +#else +#define SIMDJSON_IMPLEMENTATION_WESTMERE SIMDJSON_IS_X86_64 +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_WESTMERE (SIMDJSON_IMPLEMENTATION_WESTMERE && SIMDJSON_IS_X86_64 && __SSE4_2__ && __PCLMUL__) + +#ifndef SIMDJSON_IMPLEMENTATION_PPC64 +#define SIMDJSON_IMPLEMENTATION_PPC64 (SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX) +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_PPC64 SIMDJSON_IMPLEMENTATION_PPC64 && SIMDJSON_IS_PPC64 && SIMDJSON_IS_PPC64_VMX + +// Default Fallback to on unless a builtin implementation has already been selected. +#ifndef SIMDJSON_IMPLEMENTATION_FALLBACK +#if SIMDJSON_CAN_ALWAYS_RUN_ARM64 || SIMDJSON_CAN_ALWAYS_RUN_ICELAKE || SIMDJSON_CAN_ALWAYS_RUN_HASWELL || SIMDJSON_CAN_ALWAYS_RUN_WESTMERE || SIMDJSON_CAN_ALWAYS_RUN_PPC64 +// if anything at all except fallback can always run, then disable fallback. +#define SIMDJSON_IMPLEMENTATION_FALLBACK 0 +#else +#define SIMDJSON_IMPLEMENTATION_FALLBACK 1 +#endif +#endif +#define SIMDJSON_CAN_ALWAYS_RUN_FALLBACK SIMDJSON_IMPLEMENTATION_FALLBACK + +// Determine the best builtin implementation +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION + +#if SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +#define SIMDJSON_BUILTIN_IMPLEMENTATION icelake +#elif SIMDJSON_CAN_ALWAYS_RUN_HASWELL +#define SIMDJSON_BUILTIN_IMPLEMENTATION haswell +#elif SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +#define SIMDJSON_BUILTIN_IMPLEMENTATION westmere +#elif SIMDJSON_CAN_ALWAYS_RUN_ARM64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION arm64 +#elif SIMDJSON_CAN_ALWAYS_RUN_PPC64 +#define SIMDJSON_BUILTIN_IMPLEMENTATION ppc64 +#elif SIMDJSON_CAN_ALWAYS_RUN_FALLBACK +#define SIMDJSON_BUILTIN_IMPLEMENTATION fallback +#else +#error "All possible implementations (including fallback) have been disabled! simdjson will not run." +#endif + +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION + +#define SIMDJSON_BUILTIN_IMPLEMENTATION_ID SIMDJSON_IMPLEMENTATION_ID_FOR(SIMDJSON_BUILTIN_IMPLEMENTATION) +#define SIMDJSON_BUILTIN_IMPLEMENTATION_IS(IMPL) SIMDJSON_BUILTIN_IMPLEMENTATION_ID == SIMDJSON_IMPLEMENTATION_ID_FOR(IMPL) + +#endif // SIMDJSON_IMPLEMENTATION_DETECTION_H +/* end file simdjson/implementation_detection.h */ + +namespace simdjson { +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) + namespace arm64 {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) + namespace fallback {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) + namespace haswell {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) + namespace icelake {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) + namespace ppc64 {} +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) + namespace westmere {} +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + + /** + * Represents the best statically linked simdjson implementation that can be used by the compiling + * program. + * + * Detects what options the program is compiled against, and picks the minimum implementation that + * will work on any computer that can run the program. For example, if you compile with g++ + * -march=westmere, it will pick the westmere implementation. The haswell implementation will + * still be available, and can be selected at runtime, but the builtin implementation (and any + * code that uses it) will use westmere. + */ + namespace builtin = SIMDJSON_BUILTIN_IMPLEMENTATION; +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_BASE_H +/* end file simdjson/builtin/base.h */ +/* including simdjson/builtin/implementation.h: #include "simdjson/builtin/implementation.h" */ +/* begin file simdjson/builtin/implementation.h */ +#ifndef SIMDJSON_BUILTIN_IMPLEMENTATION_H +#define SIMDJSON_BUILTIN_IMPLEMENTATION_H + +/* skipped duplicate #include "simdjson/builtin/base.h" */ + +/* including simdjson/generic/dependencies.h: #include "simdjson/generic/dependencies.h" */ +/* begin file simdjson/generic/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error simdjson/generic/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_GENERIC_DEPENDENCIES_H +#define SIMDJSON_GENERIC_DEPENDENCIES_H + +// Internal headers needed for generics. +// All includes referencing simdjson headers *not* under simdjson/generic must be here! +// Otherwise, amalgamation will fail. +/* skipped duplicate #include "simdjson/base.h" */ +/* skipped duplicate #include "simdjson/implementation.h" */ +/* skipped duplicate #include "simdjson/implementation_detection.h" */ +/* including simdjson/internal/instruction_set.h: #include "simdjson/internal/instruction_set.h" */ +/* begin file simdjson/internal/instruction_set.h */ +/* From +https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h +Highly modified. + +Copyright (c) 2016- Facebook, Inc (Adam Paszke) +Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +Copyright (c) 2011-2013 NYU (Clement Farabet) +Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, +Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute +(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, +Samy Bengio, Johnny Mariethoz) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories +America and IDIAP Research Institute nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef SIMDJSON_INTERNAL_INSTRUCTION_SET_H +#define SIMDJSON_INTERNAL_INSTRUCTION_SET_H + +namespace simdjson { +namespace internal { + +enum instruction_set { + DEFAULT = 0x0, + NEON = 0x1, + AVX2 = 0x4, + SSE42 = 0x8, + PCLMULQDQ = 0x10, + BMI1 = 0x20, + BMI2 = 0x40, + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512DQ = 0x200, + AVX512IFMA = 0x400, + AVX512PF = 0x800, + AVX512ER = 0x1000, + AVX512CD = 0x2000, + AVX512BW = 0x4000, + AVX512VL = 0x8000, + AVX512VBMI2 = 0x10000 +}; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_INSTRUCTION_SET_H +/* end file simdjson/internal/instruction_set.h */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ +/* including simdjson/internal/jsoncharutils_tables.h: #include "simdjson/internal/jsoncharutils_tables.h" */ +/* begin file simdjson/internal/jsoncharutils_tables.h */ +#ifndef SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +#define SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#ifdef JSON_TEST_STRINGS +void found_string(const uint8_t *buf, const uint8_t *parsed_begin, + const uint8_t *parsed_end); +void found_bad_string(const uint8_t *buf); +#endif + +namespace simdjson { +namespace internal { +// structural chars here are +// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c (and NULL) +// we are also interested in the four whitespace characters +// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d + +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace_negated[256]; +extern SIMDJSON_DLLIMPORTEXPORT const bool structural_or_whitespace[256]; +extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_JSONCHARUTILS_TABLES_H +/* end file simdjson/internal/jsoncharutils_tables.h */ +/* including simdjson/internal/numberparsing_tables.h: #include "simdjson/internal/numberparsing_tables.h" */ +/* begin file simdjson/internal/numberparsing_tables.h */ +#ifndef SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +#define SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +namespace simdjson { +namespace internal { +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +constexpr int smallest_power = -342; +constexpr int largest_power = 308; + +/** + * Represents a 128-bit value. + * low: least significant 64 bits. + * high: most significant 64 bits. + */ +struct value128 { + uint64_t low; + uint64_t high; +}; + + +// Precomputed powers of ten from 10^0 to 10^22. These +// can be represented exactly using the double type. +extern SIMDJSON_DLLIMPORTEXPORT const double power_of_ten[]; + + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + + +// The truncated powers of five from 5^-342 all the way to 5^308 +// The mantissa is truncated to 128 bits, and +// never rounded up. Uses about 10KB. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t power_of_five_128[]; +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_NUMBERPARSING_TABLES_H +/* end file simdjson/internal/numberparsing_tables.h */ +/* including simdjson/internal/simdprune_tables.h: #include "simdjson/internal/simdprune_tables.h" */ +/* begin file simdjson/internal/simdprune_tables.h */ +#ifndef SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +#define SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H + +/* skipped duplicate #include "simdjson/base.h" */ + +#include + +namespace simdjson { // table modified and copied from +namespace internal { // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable + +extern SIMDJSON_DLLIMPORTEXPORT const unsigned char BitsSetTable256mul2[256]; + +extern SIMDJSON_DLLIMPORTEXPORT const uint8_t pshufb_combine_table[272]; + +// 256 * 8 bytes = 2kB, easily fits in cache. +extern SIMDJSON_DLLIMPORTEXPORT const uint64_t thintable_epi8[256]; + +} // namespace internal +} // namespace simdjson + +#endif // SIMDJSON_INTERNAL_SIMDPRUNE_TABLES_H +/* end file simdjson/internal/simdprune_tables.h */ + +#endif // SIMDJSON_GENERIC_DEPENDENCIES_H +/* end file simdjson/generic/dependencies.h */ + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) +/* including simdjson/arm64/implementation.h: #include "simdjson/arm64/implementation.h" */ +/* begin file simdjson/arm64/implementation.h */ +#ifndef SIMDJSON_ARM64_IMPLEMENTATION_H +#define SIMDJSON_ARM64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("arm64", "ARM NEON", internal::instruction_set::NEON) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_IMPLEMENTATION_H +/* end file simdjson/arm64/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback/implementation.h: #include "simdjson/fallback/implementation.h" */ +/* begin file simdjson/fallback/implementation.h */ +#ifndef SIMDJSON_FALLBACK_IMPLEMENTATION_H +#define SIMDJSON_FALLBACK_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "fallback", + "Generic fallback implementation", + 0 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_IMPLEMENTATION_H +/* end file simdjson/fallback/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) +/* including simdjson/haswell/implementation.h: #include "simdjson/haswell/implementation.h" */ +/* begin file simdjson/haswell/implementation.h */ +#ifndef SIMDJSON_HASWELL_IMPLEMENTATION_H +#define SIMDJSON_HASWELL_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +namespace haswell { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "haswell", + "Intel/AMD AVX2", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_IMPLEMENTATION_H +/* end file simdjson/haswell/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) +/* including simdjson/icelake/implementation.h: #include "simdjson/icelake/implementation.h" */ +/* begin file simdjson/icelake/implementation.h */ +#ifndef SIMDJSON_ICELAKE_IMPLEMENTATION_H +#define SIMDJSON_ICELAKE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +namespace icelake { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation( + "icelake", + "Intel/AMD AVX512", + internal::instruction_set::AVX2 | internal::instruction_set::PCLMULQDQ | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512F | internal::instruction_set::AVX512DQ | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 + ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_IMPLEMENTATION_H +/* end file simdjson/icelake/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) +/* including simdjson/ppc64/implementation.h: #include "simdjson/ppc64/implementation.h" */ +/* begin file simdjson/ppc64/implementation.h */ +#ifndef SIMDJSON_PPC64_IMPLEMENTATION_H +#define SIMDJSON_PPC64_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() + : simdjson::implementation("ppc64", "PPC64 ALTIVEC", + internal::instruction_set::ALTIVEC) {} + + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, size_t max_length, + std::unique_ptr &dst) + const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, + uint8_t *dst, + size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, + size_t len) const noexcept final; +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_IMPLEMENTATION_H +/* end file simdjson/ppc64/implementation.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) +/* including simdjson/westmere/implementation.h: #include "simdjson/westmere/implementation.h" */ +/* begin file simdjson/westmere/implementation.h */ +#ifndef SIMDJSON_WESTMERE_IMPLEMENTATION_H +#define SIMDJSON_WESTMERE_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/instruction_set.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +namespace westmere { + +/** + * @private + */ +class implementation final : public simdjson::implementation { +public: + simdjson_inline implementation() : simdjson::implementation("westmere", "Intel/AMD SSE4.2", internal::instruction_set::SSE42 | internal::instruction_set::PCLMULQDQ) {} + simdjson_warn_unused error_code create_dom_parser_implementation( + size_t capacity, + size_t max_length, + std::unique_ptr& dst + ) const noexcept final; + simdjson_warn_unused error_code minify(const uint8_t *buf, size_t len, uint8_t *dst, size_t &dst_len) const noexcept final; + simdjson_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final; +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/implementation.h */ +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { + /** + * Function which returns a pointer to an implementation matching the "builtin" implementation. + * The builtin implementation is the best statically linked simdjson implementation that can be used by the compiling + * program. If you compile with g++ -march=haswell, this will return the haswell implementation. + * It is handy to be able to check what builtin was used: builtin_implementation()->name(). + */ + const implementation * builtin_implementation(); +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_IMPLEMENTATION_H +/* end file simdjson/builtin/implementation.h */ + +/* skipped duplicate #include "simdjson/generic/dependencies.h" */ + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) +/* including simdjson/arm64.h: #include "simdjson/arm64.h" */ +/* begin file simdjson/arm64.h */ +#ifndef SIMDJSON_ARM64_H +#define SIMDJSON_ARM64_H + +/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if _M_ARM64 +// __umulh requires intrin.h +#include +#endif // _M_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +/** + * make_uint8x16_t initializes a SIMD register (uint8x16_t). + * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...} + * is not recognized under Visual Studio! This is a workaround. + * Using a std::initializer_list as a parameter resulted in + * inefficient code. With the current approach, if the parameters are + * compile-time constants, + * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}. + * You should not use this function except for compile-time constants: + * it is not efficient. + */ +simdjson_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, + uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, + uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { + // Doing a load like so end ups generating worse code. + // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_u8(array); + uint8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_u8(x1, x, 0); + x = vsetq_lane_u8(x2, x, 1); + x = vsetq_lane_u8(x3, x, 2); + x = vsetq_lane_u8(x4, x, 3); + x = vsetq_lane_u8(x5, x, 4); + x = vsetq_lane_u8(x6, x, 5); + x = vsetq_lane_u8(x7, x, 6); + x = vsetq_lane_u8(x8, x, 7); + x = vsetq_lane_u8(x9, x, 8); + x = vsetq_lane_u8(x10, x, 9); + x = vsetq_lane_u8(x11, x, 10); + x = vsetq_lane_u8(x12, x, 11); + x = vsetq_lane_u8(x13, x, 12); + x = vsetq_lane_u8(x14, x, 13); + x = vsetq_lane_u8(x15, x, 14); + x = vsetq_lane_u8(x16, x, 15); + return x; +} + +simdjson_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { + uint8x8_t x{}; + x = vset_lane_u8(x1, x, 0); + x = vset_lane_u8(x2, x, 1); + x = vset_lane_u8(x3, x, 2); + x = vset_lane_u8(x4, x, 3); + x = vset_lane_u8(x5, x, 4); + x = vset_lane_u8(x6, x, 5); + x = vset_lane_u8(x7, x, 6); + x = vset_lane_u8(x8, x, 7); + return x; +} + +// We have to do the same work for make_int8x16_t +simdjson_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, + int8_t x5, int8_t x6, int8_t x7, int8_t x8, + int8_t x9, int8_t x10, int8_t x11, int8_t x12, + int8_t x13, int8_t x14, int8_t x15, int8_t x16) { + // Doing a load like so end ups generating worse code. + // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_s8(array); + int8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_s8(x1, x, 0); + x = vsetq_lane_s8(x2, x, 1); + x = vsetq_lane_s8(x3, x, 2); + x = vsetq_lane_s8(x4, x, 3); + x = vsetq_lane_s8(x5, x, 4); + x = vsetq_lane_s8(x6, x, 5); + x = vsetq_lane_s8(x7, x, 6); + x = vsetq_lane_s8(x8, x, 7); + x = vsetq_lane_s8(x9, x, 8); + x = vsetq_lane_s8(x10, x, 9); + x = vsetq_lane_s8(x11, x, 10); + x = vsetq_lane_s8(x12, x, 11); + x = vsetq_lane_s8(x13, x, 12); + x = vsetq_lane_s8(x14, x, 13); + x = vsetq_lane_s8(x15, x, 14); + x = vsetq_lane_s8(x16, x, 15); + return x; +} + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including simdjson/generic/amalgamated.h for arm64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for arm64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for arm64 */ +/* including simdjson/generic/jsoncharutils.h for arm64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for arm64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for arm64 */ +/* including simdjson/generic/atomparsing.h for arm64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for arm64 */ +/* including simdjson/generic/dom_parser_implementation.h for arm64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for arm64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { +namespace arm64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for arm64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for arm64 */ +/* including simdjson/generic/numberparsing.h for arm64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for arm64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for arm64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for arm64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for arm64 */ +/* end file simdjson/generic/amalgamated.h for arm64 */ +/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_ARM64_H +/* end file simdjson/arm64.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback.h: #include "simdjson/fallback.h" */ +/* begin file simdjson/fallback.h */ +#ifndef SIMDJSON_FALLBACK_H +#define SIMDJSON_FALLBACK_H + +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/amalgamated.h for fallback: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for fallback: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for fallback */ +/* including simdjson/generic/jsoncharutils.h for fallback: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for fallback */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for fallback */ +/* including simdjson/generic/atomparsing.h for fallback: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace fallback { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for fallback */ +/* including simdjson/generic/dom_parser_implementation.h for fallback: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for fallback */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace fallback +} // namespace simdjson + +namespace simdjson { +namespace fallback { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for fallback */ +/* including simdjson/generic/implementation_simdjson_result_base.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for fallback */ +/* including simdjson/generic/numberparsing.h for fallback: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for fallback */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for fallback */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for fallback: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for fallback */ +/* end file simdjson/generic/amalgamated.h for fallback */ +/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ + +#endif // SIMDJSON_FALLBACK_H +/* end file simdjson/fallback.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) +/* including simdjson/haswell.h: #include "simdjson/haswell.h" */ +/* begin file simdjson/haswell.h */ +#ifndef SIMDJSON_HASWELL_H +#define SIMDJSON_HASWELL_H + +/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including simdjson/generic/amalgamated.h for haswell: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for haswell: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for haswell */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for haswell */ +/* including simdjson/generic/jsoncharutils.h for haswell: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for haswell */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for haswell */ +/* including simdjson/generic/atomparsing.h for haswell: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for haswell */ +/* including simdjson/generic/dom_parser_implementation.h for haswell: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for haswell */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace haswell +} // namespace simdjson + +namespace simdjson { +namespace haswell { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for haswell */ +/* including simdjson/generic/implementation_simdjson_result_base.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for haswell */ +/* including simdjson/generic/numberparsing.h for haswell: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for haswell */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for haswell */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for haswell: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for haswell */ +/* end file simdjson/generic/amalgamated.h for haswell */ +/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ + +#endif // SIMDJSON_HASWELL_H +/* end file simdjson/haswell.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) +/* including simdjson/icelake.h: #include "simdjson/icelake.h" */ +/* begin file simdjson/icelake.h */ +#ifndef SIMDJSON_ICELAKE_H +#define SIMDJSON_ICELAKE_H + +/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including simdjson/generic/amalgamated.h for icelake: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for icelake: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for icelake */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for icelake */ +/* including simdjson/generic/jsoncharutils.h for icelake: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for icelake */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for icelake */ +/* including simdjson/generic/atomparsing.h for icelake: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for icelake */ +/* including simdjson/generic/dom_parser_implementation.h for icelake: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for icelake */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace icelake +} // namespace simdjson + +namespace simdjson { +namespace icelake { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for icelake */ +/* including simdjson/generic/implementation_simdjson_result_base.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for icelake */ +/* including simdjson/generic/numberparsing.h for icelake: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for icelake */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for icelake */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for icelake: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for icelake */ +/* end file simdjson/generic/amalgamated.h for icelake */ +/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ +/* begin file simdjson/icelake/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ + +#endif // SIMDJSON_ICELAKE_H +/* end file simdjson/icelake.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) +/* including simdjson/ppc64.h: #include "simdjson/ppc64.h" */ +/* begin file simdjson/ppc64.h */ +#ifndef SIMDJSON_PPC64_H +#define SIMDJSON_PPC64_H + +/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including simdjson/generic/amalgamated.h for ppc64: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for ppc64: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for ppc64 */ +/* including simdjson/generic/jsoncharutils.h for ppc64: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for ppc64 */ +/* including simdjson/generic/atomparsing.h for ppc64: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for ppc64 */ +/* including simdjson/generic/dom_parser_implementation.h for ppc64: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { +namespace ppc64 { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for ppc64 */ +/* including simdjson/generic/implementation_simdjson_result_base.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for ppc64 */ +/* including simdjson/generic/numberparsing.h for ppc64: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for ppc64 */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for ppc64 */ +/* end file simdjson/generic/amalgamated.h for ppc64 */ +/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_PPC64_H +/* end file simdjson/ppc64.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) +/* including simdjson/westmere.h: #include "simdjson/westmere.h" */ +/* begin file simdjson/westmere.h */ +#ifndef SIMDJSON_WESTMERE_H +#define SIMDJSON_WESTMERE_H + +/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including simdjson/generic/amalgamated.h for westmere: #include "simdjson/generic/amalgamated.h" */ +/* begin file simdjson/generic/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_DEPENDENCIES_H) +#error simdjson/generic/dependencies.h must be included before simdjson/generic/amalgamated.h! +#endif + +/* including simdjson/generic/base.h for westmere: #include "simdjson/generic/base.h" */ +/* begin file simdjson/generic/base.h for westmere */ +#ifndef SIMDJSON_GENERIC_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): // If we haven't got an implementation yet, we're in the editor, editing a generic file! Just */ +/* amalgamation skipped (editor-only): // use the most advanced one we can so the most possible stuff can be tested. */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation_detection.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_IMPLEMENTATION_ICELAKE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_HASWELL */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_WESTMERE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_ARM64 */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_PPC64 */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/begin.h" */ +/* amalgamation skipped (editor-only): #elif SIMDJSON_IMPLEMENTATION_FALLBACK */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/begin.h" */ +/* amalgamation skipped (editor-only): #else */ +/* amalgamation skipped (editor-only): #error "All possible implementations (including fallback) have been disabled! simdjson will not run." */ +/* amalgamation skipped (editor-only): #endif */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_IMPLEMENTATION */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +struct open_container; +class dom_parser_implementation; + +/** + * The type of a JSON number + */ +enum class number_type { + floating_point_number=1, /// a binary64 number + signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + unsigned_integer /// a positive integer larger or equal to 1<<63 +}; + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_BASE_H +/* end file simdjson/generic/base.h for westmere */ +/* including simdjson/generic/jsoncharutils.h for westmere: #include "simdjson/generic/jsoncharutils.h" */ +/* begin file simdjson/generic/jsoncharutils.h for westmere */ +#ifndef SIMDJSON_GENERIC_JSONCHARUTILS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_JSONCHARUTILS_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/jsoncharutils_tables.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace jsoncharutils { + +// return non-zero if not a structural or whitespace char +// zero otherwise +simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace_negated[c]; +} + +simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { + return internal::structural_or_whitespace[c]; +} + +// returns a value with the high 16 bits set if not valid +// otherwise returns the conversion of the 4 hex digits at src into the bottom +// 16 bits of the 32-bit return register +// +// see +// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ +static inline uint32_t hex_to_u32_nocheck( + const uint8_t *src) { // strictly speaking, static inline is a C-ism + uint32_t v1 = internal::digit_to_val32[630 + src[0]]; + uint32_t v2 = internal::digit_to_val32[420 + src[1]]; + uint32_t v3 = internal::digit_to_val32[210 + src[2]]; + uint32_t v4 = internal::digit_to_val32[0 + src[3]]; + return v1 | v2 | v3 | v4; +} + +// given a code point cp, writes to c +// the utf-8 code, outputting the length in +// bytes, if the length is zero, the code point +// is invalid +// +// This can possibly be made faster using pdep +// and clz and table lookups, but JSON documents +// have few escaped code points, and the following +// function looks cheap. +// +// Note: we assume that surrogates are treated separately +// +simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { + if (cp <= 0x7F) { + c[0] = uint8_t(cp); + return 1; // ascii + } + if (cp <= 0x7FF) { + c[0] = uint8_t((cp >> 6) + 192); + c[1] = uint8_t((cp & 63) + 128); + return 2; // universal plane + // Surrogates are treated elsewhere... + //} //else if (0xd800 <= cp && cp <= 0xdfff) { + // return 0; // surrogates // could put assert here + } else if (cp <= 0xFFFF) { + c[0] = uint8_t((cp >> 12) + 224); + c[1] = uint8_t(((cp >> 6) & 63) + 128); + c[2] = uint8_t((cp & 63) + 128); + return 3; + } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this + // is not needed + c[0] = uint8_t((cp >> 18) + 240); + c[1] = uint8_t(((cp >> 12) & 63) + 128); + c[2] = uint8_t(((cp >> 6) & 63) + 128); + c[3] = uint8_t((cp & 63) + 128); + return 4; + } + // will return 0 when the code point was too large. + return 0; // bad r +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +} // namespace jsoncharutils +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_JSONCHARUTILS_H +/* end file simdjson/generic/jsoncharutils.h for westmere */ +/* including simdjson/generic/atomparsing.h for westmere: #include "simdjson/generic/atomparsing.h" */ +/* begin file simdjson/generic/atomparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_ATOMPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ATOMPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace { +/// @private +namespace atomparsing { + +// The string_to_uint32 is exclusively used to map literal strings to 32-bit values. +// We use memcpy instead of a pointer cast to avoid undefined behaviors since we cannot +// be certain that the character pointer will be properly aligned. +// You might think that using memcpy makes this function expensive, but you'd be wrong. +// All decent optimizing compilers (GCC, clang, Visual Studio) will compile string_to_uint32("false"); +// to the compile-time constant 1936482662. +simdjson_inline uint32_t string_to_uint32(const char* str) { uint32_t val; std::memcpy(&val, str, sizeof(uint32_t)); return val; } + + +// Again in str4ncmp we use a memcpy to avoid undefined behavior. The memcpy may appear expensive. +// Yet all decent optimizing compilers will compile memcpy to a single instruction, just about. +simdjson_warn_unused +simdjson_inline uint32_t str4ncmp(const uint8_t *src, const char* atom) { + uint32_t srcval; // we want to avoid unaligned 32-bit loads (undefined in C/C++) + static_assert(sizeof(uint32_t) <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be larger than 4 bytes"); + std::memcpy(&srcval, src, sizeof(uint32_t)); + return srcval ^ string_to_uint32(atom); +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src) { + return (str4ncmp(src, "true") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_true_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_true_atom(src); } + else if (len == 4) { return !str4ncmp(src, "true"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src) { + return (str4ncmp(src+1, "alse") | jsoncharutils::is_not_structural_or_whitespace(src[5])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_false_atom(const uint8_t *src, size_t len) { + if (len > 5) { return is_valid_false_atom(src); } + else if (len == 5) { return !str4ncmp(src+1, "alse"); } + else { return false; } +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src) { + return (str4ncmp(src, "null") | jsoncharutils::is_not_structural_or_whitespace(src[4])) == 0; +} + +simdjson_warn_unused +simdjson_inline bool is_valid_null_atom(const uint8_t *src, size_t len) { + if (len > 4) { return is_valid_null_atom(src); } + else if (len == 4) { return !str4ncmp(src, "null"); } + else { return false; } +} + +} // namespace atomparsing +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ATOMPARSING_H +/* end file simdjson/generic/atomparsing.h for westmere */ +/* including simdjson/generic/dom_parser_implementation.h for westmere: #include "simdjson/generic/dom_parser_implementation.h" */ +/* begin file simdjson/generic/dom_parser_implementation.h for westmere */ +#ifndef SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// expectation: sizeof(open_container) = 64/8. +struct open_container { + uint32_t tape_index; // where, on the tape, does the scope ([,{) begins + uint32_t count; // how many elements in the scope +}; // struct open_container + +static_assert(sizeof(open_container) == 64/8, "Open container must be 64 bits"); + +class dom_parser_implementation final : public internal::dom_parser_implementation { +public: + /** Tape location of each open { or [ */ + std::unique_ptr open_containers{}; + /** Whether each open container is a [ or { */ + std::unique_ptr is_array{}; + /** Buffer passed to stage 1 */ + const uint8_t *buf{}; + /** Length passed to stage 1 */ + size_t len{0}; + /** Document passed to stage 2 */ + dom::document *doc{}; + + inline dom_parser_implementation() noexcept; + inline dom_parser_implementation(dom_parser_implementation &&other) noexcept; + inline dom_parser_implementation &operator=(dom_parser_implementation &&other) noexcept; + dom_parser_implementation(const dom_parser_implementation &) = delete; + dom_parser_implementation &operator=(const dom_parser_implementation &) = delete; + + simdjson_warn_unused error_code parse(const uint8_t *buf, size_t len, dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage1(const uint8_t *buf, size_t len, stage1_mode partial) noexcept final; + simdjson_warn_unused error_code stage2(dom::document &doc) noexcept final; + simdjson_warn_unused error_code stage2_next(dom::document &doc) noexcept final; + simdjson_warn_unused uint8_t *parse_string(const uint8_t *src, uint8_t *dst, bool allow_replacement) const noexcept final; + simdjson_warn_unused uint8_t *parse_wobbly_string(const uint8_t *src, uint8_t *dst) const noexcept final; + inline simdjson_warn_unused error_code set_capacity(size_t capacity) noexcept final; + inline simdjson_warn_unused error_code set_max_depth(size_t max_depth) noexcept final; +private: + simdjson_inline simdjson_warn_unused error_code set_capacity_stage1(size_t capacity); + +}; + +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +namespace westmere { + +inline dom_parser_implementation::dom_parser_implementation() noexcept = default; +inline dom_parser_implementation::dom_parser_implementation(dom_parser_implementation &&other) noexcept = default; +inline dom_parser_implementation &dom_parser_implementation::operator=(dom_parser_implementation &&other) noexcept = default; + +// Leaving these here so they can be inlined if so desired +inline simdjson_warn_unused error_code dom_parser_implementation::set_capacity(size_t capacity) noexcept { + if(capacity > SIMDJSON_MAXSIZE_BYTES) { return CAPACITY; } + // Stage 1 index output + size_t max_structures = SIMDJSON_ROUNDUP_N(capacity, 64) + 2 + 7; + structural_indexes.reset( new (std::nothrow) uint32_t[max_structures] ); + if (!structural_indexes) { _capacity = 0; return MEMALLOC; } + structural_indexes[0] = 0; + n_structural_indexes = 0; + + _capacity = capacity; + return SUCCESS; +} + +inline simdjson_warn_unused error_code dom_parser_implementation::set_max_depth(size_t max_depth) noexcept { + // Stage 2 stacks + open_containers.reset(new (std::nothrow) open_container[max_depth]); + is_array.reset(new (std::nothrow) bool[max_depth]); + if (!is_array || !open_containers) { _max_depth = 0; return MEMALLOC; } + + _max_depth = max_depth; + return SUCCESS; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_DOM_PARSER_IMPLEMENTATION_H +/* end file simdjson/generic/dom_parser_implementation.h for westmere */ +/* including simdjson/generic/implementation_simdjson_result_base.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// This is a near copy of include/error.h's implementation_simdjson_result_base, except it doesn't use std::pair +// so we can avoid inlining errors +// TODO reconcile these! +/** + * The result of a simdjson operation that could fail. + * + * Gives the option of reading error codes, or throwing an exception by casting to the desired result. + * + * This is a base class for implementations that want to add functions to the result type for + * chaining. + * + * Override like: + * + * struct simdjson_result : public internal::implementation_simdjson_result_base { + * simdjson_result() noexcept : internal::implementation_simdjson_result_base() {} + * simdjson_result(error_code error) noexcept : internal::implementation_simdjson_result_base(error) {} + * simdjson_result(T &&value) noexcept : internal::implementation_simdjson_result_base(std::forward(value)) {} + * simdjson_result(T &&value, error_code error) noexcept : internal::implementation_simdjson_result_base(value, error) {} + * // Your extra methods here + * } + * + * Then any method returning simdjson_result will be chainable with your methods. + */ +template +struct implementation_simdjson_result_base { + + /** + * Create a new empty result with error = UNINITIALIZED. + */ + simdjson_inline implementation_simdjson_result_base() noexcept = default; + + /** + * Create a new error result. + */ + simdjson_inline implementation_simdjson_result_base(error_code error) noexcept; + + /** + * Create a new successful result. + */ + simdjson_inline implementation_simdjson_result_base(T &&value) noexcept; + + /** + * Create a new result with both things (use if you don't want to branch when creating the result). + */ + simdjson_inline implementation_simdjson_result_base(T &&value, error_code error) noexcept; + + /** + * Move the value and the error to the provided variables. + * + * @param value The variable to assign the value to. May not be set if there is an error. + * @param error The variable to assign the error to. Set to SUCCESS if there is no error. + */ + simdjson_inline void tie(T &value, error_code &error) && noexcept; + + /** + * Move the value to the provided variable. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + simdjson_inline error_code get(T &value) && noexcept; + + /** + * The error. + */ + simdjson_inline error_code error() const noexcept; + +#if SIMDJSON_EXCEPTIONS + + /** + * Get the result value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& value() & noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& value() && noexcept(false); + + /** + * Take the result value (move it). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T&& take_value() && noexcept(false); + + /** + * Cast to the value (will throw on error). + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline operator T&&() && noexcept(false); + + +#endif // SIMDJSON_EXCEPTIONS + + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline const T& value_unsafe() const& noexcept; + /** + * Get the result value. This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T& value_unsafe() & noexcept; + /** + * Take the result value (move it). This function is safe if and only + * the error() method returns a value that evaluates to false. + */ + simdjson_inline T&& value_unsafe() && noexcept; +protected: + /** users should never directly access first and second. **/ + T first{}; /** Users should never directly access 'first'. **/ + error_code second{UNINITIALIZED}; /** Users should never directly access 'second'. **/ +}; // struct implementation_simdjson_result_base + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_H +/* end file simdjson/generic/implementation_simdjson_result_base.h for westmere */ +/* including simdjson/generic/numberparsing.h for westmere: #include "simdjson/generic/numberparsing.h" */ +/* begin file simdjson/generic/numberparsing.h for westmere */ +#ifndef SIMDJSON_GENERIC_NUMBERPARSING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_NUMBERPARSING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/jsoncharutils.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include +#include + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +#ifdef JSON_TEST_NUMBERS +#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE))) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE))) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE))) +#else +#define INVALID_NUMBER(SRC) (NUMBER_ERROR) +#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE)) +#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE)) +#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE)) +#endif + +namespace { + +// Convert a mantissa, an exponent and a sign bit into an ieee64 double. +// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable). +// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed. +simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) { + double d; + mantissa &= ~(1ULL << 52); + mantissa |= real_exponent << 52; + mantissa |= ((static_cast(negative)) << 63); + std::memcpy(&d, &mantissa, sizeof(d)); + return d; +} + +// Attempts to compute i * 10^(power) exactly; and if "negative" is +// true, negate the result. +// This function will only work in some cases, when it does not work, success is +// set to false. This should work *most of the time* (like 99% of the time). +// We assume that power is in the [smallest_power, +// largest_power] interval: the caller is responsible for this check. +simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) { + // we start with a fast path + // It was described in + // Clinger WD. How to read floating point numbers accurately. + // ACM SIGPLAN Notices. 1990 +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + // We cannot be certain that x/y is rounded to nearest. + if (0 <= power && power <= 22 && i <= 9007199254740991) +#else + if (-22 <= power && power <= 22 && i <= 9007199254740991) +#endif + { + // convert the integer into a double. This is lossless since + // 0 <= i <= 2^53 - 1. + d = double(i); + // + // The general idea is as follows. + // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then + // 1) Both s and p can be represented exactly as 64-bit floating-point + // values + // (binary64). + // 2) Because s and p can be represented exactly as floating-point values, + // then s * p + // and s / p will produce correctly rounded values. + // + if (power < 0) { + d = d / simdjson::internal::power_of_ten[-power]; + } else { + d = d * simdjson::internal::power_of_ten[power]; + } + if (negative) { + d = -d; + } + return true; + } + // When 22 < power && power < 22 + 16, we could + // hope for another, secondary fast path. It was + // described by David M. Gay in "Correctly rounded + // binary-decimal and decimal-binary conversions." (1990) + // If you need to compute i * 10^(22 + x) for x < 16, + // first compute i * 10^x, if you know that result is exact + // (e.g., when i * 10^x < 2^53), + // then you can still proceed and do (i * 10^x) * 10^22. + // Is this worth your time? + // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53) + // for this second fast path to work. + // If you you have 22 < power *and* power < 22 + 16, and then you + // optimistically compute "i * 10^(x-22)", there is still a chance that you + // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of + // this optimization maybe less common than we would like. Source: + // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html + + // The fast path has now failed, so we are failing back on the slower path. + + // In the slow path, we need to adjust i so that it is > 1<<63 which is always + // possible, except if i == 0, so we handle i == 0 separately. + if(i == 0) { + d = negative ? -0.0 : 0.0; + return true; + } + + + // The exponent is 1024 + 63 + power + // + floor(log(5**power)/log(2)). + // The 1024 comes from the ieee64 standard. + // The 63 comes from the fact that we use a 64-bit word. + // + // Computing floor(log(5**power)/log(2)) could be + // slow. Instead we use a fast function. + // + // For power in (-400,350), we have that + // (((152170 + 65536) * power ) >> 16); + // is equal to + // floor(log(5**power)/log(2)) + power when power >= 0 + // and it is equal to + // ceil(log(5**-power)/log(2)) + power when power < 0 + // + // The 65536 is (1<<16) and corresponds to + // (65536 * power) >> 16 ---> power + // + // ((152170 * power ) >> 16) is equal to + // floor(log(5**power)/log(2)) + // + // Note that this is not magic: 152170/(1<<16) is + // approximatively equal to log(5)/log(2). + // The 1<<16 value is a power of two; we could use a + // larger power of 2 if we wanted to. + // + int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63; + + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(i); + i <<= lz; + + + // We are going to need to do some 64-bit arithmetic to get a precise product. + // We use a table lookup approach. + // It is safe because + // power >= smallest_power + // and power <= largest_power + // We recover the mantissa of the power, it has a leading 1. It is always + // rounded down. + // + // We want the most significant 64 bits of the product. We know + // this will be non-zero because the most significant bit of i is + // 1. + const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power); + // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.) + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 firstproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index]); + // Both i and power_of_five_128[index] have their most significant bit set to 1 which + // implies that the either the most or the second most significant bit of the product + // is 1. We pack values in this manner for efficiency reasons: it maximizes the use + // we make of the product. It also makes it easy to reason about the product: there + // is 0 or 1 leading zero in the product. + + // Unless the least significant 9 bits of the high (64-bit) part of the full + // product are all 1s, then we know that the most significant 55 bits are + // exact and no further work is needed. Having 55 bits is necessary because + // we need 53 bits for the mantissa but we have to have one rounding bit and + // we can waste a bit if the most significant bit of the product is zero. + if((firstproduct.high & 0x1FF) == 0x1FF) { + // We want to compute i * 5^q, but only care about the top 55 bits at most. + // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing + // the full computation is wasteful. So we do what is called a "truncated + // multiplication". + // We take the most significant 64-bits, and we put them in + // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q + // to the desired approximation using one multiplication. Sometimes it does not suffice. + // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and + // then we get a better approximation to i * 5^q. In very rare cases, even that + // will not suffice, though it is seemingly very hard to find such a scenario. + // + // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat + // more complicated. + // + // There is an extra layer of complexity in that we need more than 55 bits of + // accuracy in the round-to-even scenario. + // + // The full_multiplication function computes the 128-bit product of two 64-bit words + // with a returned value of type value128 with a "low component" corresponding to the + // 64-bit least significant bits of the product and with a "high component" corresponding + // to the 64-bit most significant bits of the product. + simdjson::internal::value128 secondproduct = full_multiplication(i, simdjson::internal::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if(secondproduct.high > firstproduct.low) { firstproduct.high++; } + // At this point, we might need to add at most one to firstproduct, but this + // can only change the value of firstproduct.high if firstproduct.low is maximal. + if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) { + // This is very unlikely, but if so, we need to do much more work! + return false; + } + } + uint64_t lower = firstproduct.low; + uint64_t upper = firstproduct.high; + // The final mantissa should be 53 bits with a leading 1. + // We shift it so that it occupies 54 bits with a leading 1. + /////// + uint64_t upperbit = upper >> 63; + uint64_t mantissa = upper >> (upperbit + 9); + lz += int(1 ^ upperbit); + + // Here we have mantissa < (1<<54). + int64_t real_exponent = exponent - lz; + if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal? + // Here have that real_exponent <= 0 so -real_exponent >= 0 + if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure. + d = negative ? -0.0 : 0.0; + return true; + } + // next line is safe because -real_exponent + 1 < 0 + mantissa >>= -real_exponent + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + mantissa += (mantissa & 1); // round up + mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1; + d = to_double(mantissa, real_exponent, negative); + return true; + } + // We have to round to even. The "to even" part + // is only a problem when we are right in between two floats + // which we guard against. + // If we have lots of trailing zeros, we may fall right between two + // floating-point values. + // + // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54] + // times a power of two. That is, it is right between a number with binary significand + // m and another number with binary significand m+1; and it must be the case + // that it cannot be represented by a float itself. + // + // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p. + // Recall that 10^q = 5^q * 2^q. + // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that + // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23. + // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so + // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have + // 2^{53} x 5^{-q} < 2^{64}. + // Hence we have 5^{-q} < 2^{11}$ or q>= -4. + // + // We require lower <= 1 and not lower == 0 because we could not prove that + // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test. + if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) { + if((mantissa << (upperbit + 64 - 53 - 2)) == upper) { + mantissa &= ~1; // flip it so that we do not round up + } + } + + mantissa += mantissa & 1; + mantissa >>= 1; + + // Here we have mantissa < (1<<53), unless there was an overflow + if (mantissa >= (1ULL << 53)) { + ////////// + // This will happen when parsing values such as 7.2057594037927933e+16 + //////// + mantissa = (1ULL << 52); + real_exponent++; + } + mantissa &= ~(1ULL << 52); + // we have to check that real_exponent is in range, otherwise we bail out + if (simdjson_unlikely(real_exponent > 2046)) { + // We have an infinite value!!! We could actually throw an error here if we could. + return false; + } + d = to_double(mantissa, real_exponent, negative); + return true; +} + +// We call a fallback floating-point parser that might be slow. Note +// it will accept JSON numbers, but the JSON spec. is more restrictive so +// before you call parse_float_fallback, you need to have validated the input +// string with the JSON grammar. +// It will return an error (false) if the parsed number is infinite. +// The string parsing itself always succeeds. We know that there is at least +// one digit. +static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) { + *outDouble = simdjson::internal::from_chars(reinterpret_cast(ptr), reinterpret_cast(end_ptr)); + // We do not accept infinite values. + + // Detecting finite values in a portable manner is ridiculously hard, ideally + // we would want to do: + // return !std::isfinite(*outDouble); + // but that mysteriously fails under legacy/old libc++ libraries, see + // https://github.com/simdjson/simdjson/issues/1286 + // + // Therefore, fall back to this solution (the extra parens are there + // to handle that max may be a macro on windows). + return !(*outDouble > (std::numeric_limits::max)() || *outDouble < std::numeric_limits::lowest()); +} + +// check quickly whether the next 8 chars are made of digits +// at a glance, it looks better than Mula's +// http://0x80.pl/articles/swar-digits-validate.html +simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) { + uint64_t val; + // this can read up to 7 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7"); + std::memcpy(&val, chars, 8); + // a branchy method might be faster: + // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) + // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == + // 0x3030303030303030); + return (((val & 0xF0F0F0F0F0F0F0F0) | + (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == + 0x3333333333333333); +} + +template +SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later +simdjson_inline bool parse_digit(const uint8_t c, I &i) { + const uint8_t digit = static_cast(c - '0'); + if (digit > 9) { + return false; + } + // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication + i = 10 * i + digit; // might overflow, we will handle the overflow later + return true; +} + +simdjson_inline error_code parse_decimal_after_separator(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) { + // we continue with the fiction that we have an integer. If the + // floating point number is representable as x * 10^z for some integer + // z that fits in 53 bits, then we will be able to convert back the + // the integer into a float in a lossless manner. + const uint8_t *const first_after_period = p; + +#ifdef SIMDJSON_SWAR_NUMBER_PARSING +#if SIMDJSON_SWAR_NUMBER_PARSING + // this helps if we have lots of decimals! + // this turns out to be frequent enough. + if (is_made_of_eight_digits_fast(p)) { + i = i * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + } +#endif // SIMDJSON_SWAR_NUMBER_PARSING +#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING + // Unrolling the first digit makes a small difference on some implementations (e.g. westmere) + if (parse_digit(*p, i)) { ++p; } + while (parse_digit(*p, i)) { p++; } + exponent = first_after_period - p; + // Decimal without digits (123.) is illegal + if (exponent == 0) { + return INVALID_NUMBER(src); + } + return SUCCESS; +} + +simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) { + // Exp Sign: -123.456e[-]78 + bool neg_exp = ('-' == *p); + if (neg_exp || '+' == *p) { p++; } // Skip + as well + + // Exponent: -123.456e-[78] + auto start_exp = p; + int64_t exp_number = 0; + while (parse_digit(*p, exp_number)) { ++p; } + // It is possible for parse_digit to overflow. + // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN. + // Thus we *must* check for possible overflow before we negate exp_number. + + // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into + // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may + // not oblige and may, in fact, generate two distinct paths in any case. It might be + // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off + // instructions for a simdjson_likely branch, an unconclusive gain. + + // If there were no digits, it's an error. + if (simdjson_unlikely(p == start_exp)) { + return INVALID_NUMBER(src); + } + // We have a valid positive exponent in exp_number at this point, except that + // it may have overflowed. + + // If there were more than 18 digits, we may have overflowed the integer. We have to do + // something!!!! + if (simdjson_unlikely(p > start_exp+18)) { + // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow + while (*start_exp == '0') { start_exp++; } + // 19 digits could overflow int64_t and is kind of absurd anyway. We don't + // support exponents smaller than -999,999,999,999,999,999 and bigger + // than 999,999,999,999,999,999. + // We can truncate. + // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before + // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could + // truncate at 324. + // Note that there is no reason to fail per se at this point in time. + // E.g., 0e999999999999999999999 is a fine number. + if (p > start_exp+18) { exp_number = 999999999999999999; } + } + // At this point, we know that exp_number is a sane, positive, signed integer. + // It is <= 999,999,999,999,999,999. As long as 'exponent' is in + // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent' + // is bounded in magnitude by the size of the JSON input, we are fine in this universe. + // To sum it up: the next line should never overflow. + exponent += (neg_exp ? -exp_number : exp_number); + return SUCCESS; +} + +simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) { + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + const uint8_t *start = start_digits; + while ((*start == '0') || (*start == '.')) { ++start; } + // we over-decrement by one when there is a '.' + return digit_count - size_t(start - start_digits); +} + +} // unnamed namespace + +/** @private */ +template +error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) { + double d; + if (parse_float_fallback(src, &d)) { + writer.append_double(d); + return SUCCESS; + } + return INVALID_NUMBER(src); +} + +/** @private */ +template +simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) { + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon in practice. + // + // 9999999999999999999 < 2**64 so we can accommodate 19 digits. + // If we have a decimal separator, then digit_count - 1 is the number of digits, but we + // may not have a decimal separator! + if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) { + // Ok, chances are good that we had an overflow! + // this is almost never going to get called!!! + // we start anew, going slowly!!! + // This will happen in the following examples: + // 10000000000000000000000000000000000000000000e+308 + // 3.1415926535897932384626433832795028841971693993751 + // + // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens + // because slow_float_parsing is a non-inlined function. If we passed our writer reference to + // it, it would force it to be stored in memory, preventing the compiler from picking it apart + // and putting into registers. i.e. if we pass it as reference, it gets slow. + // This is what forces the skip_double, as well. + error_code error = slow_float_parsing(src, writer); + writer.skip_double(); + return error; + } + // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other + // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331 + // To future reader: we'd love if someone found a better way, or at least could explain this result! + if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) { + // + // Important: smallest_power is such that it leads to a zero value. + // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero + // so something x 10^-343 goes to zero, but not so with something x 10^-342. + static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough"); + // + if((exponent < simdjson::internal::smallest_power) || (i == 0)) { + // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero + WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer); + return SUCCESS; + } else { // (exponent > largest_power) and (i != 0) + // We have, for sure, an infinite value and simdjson refuses to parse infinite values. + return INVALID_NUMBER(src); + } + } + double d; + if (!compute_float_64(exponent, i, negative, d)) { + // we are almost never going to get here. + if (!parse_float_fallback(src, &d)) { return INVALID_NUMBER(src); } + } + WRITE_DOUBLE(d, src, writer); + return SUCCESS; +} + +// for performance analysis, it is sometimes useful to skip parsing +#ifdef SIMDJSON_SKIPNUMBERPARSING + +template +simdjson_inline error_code parse_number(const uint8_t *const, W &writer) { + writer.append_s64(0); // always write zero + return SUCCESS; // always succeeds +} + +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * const src) noexcept { return 0; } +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { return false; } +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { return number_type::signed_integer; } +#else + +// parse the number at src +// define JSON_TEST_NUMBERS for unit testing +// +// It is assumed that the number is followed by a structural ({,},],[) character +// or a white space character. If that is not the case (e.g., when the JSON +// document is made of a single number), then it is necessary to copy the +// content and append a space before calling this function. +// +// Our objective is accurate parsing (ULP of 0) at high speed. +template +simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { + + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); } + + // + // Handle floats if there is a . or e (or both) + // + int64_t exponent = 0; + bool is_float = false; + if ('.' == *p) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_decimal_after_separator(src, p, i, exponent) ); + digit_count = int(p - start_digits); // used later to guard against overflows + } + if (('e' == *p) || ('E' == *p)) { + is_float = true; + ++p; + SIMDJSON_TRY( parse_exponent(src, p, exponent) ); + } + if (is_float) { + const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(*p); + SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) ); + if (dirty_end) { return INVALID_NUMBER(src); } + return SUCCESS; + } + + // The longest negative 64-bit number is 19 digits. + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + size_t longest_digit_count = negative ? 19 : 20; + if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); } + if (digit_count == longest_digit_count) { + if (negative) { + // Anything negative above INT64_MAX+1 is invalid + if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); } + WRITE_INTEGER(~i+1, src, writer); + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); } + } + + // Write unsigned if it doesn't fit in a signed integer. + if (i > uint64_t(INT64_MAX)) { + WRITE_UNSIGNED(i, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } + return SUCCESS; +} + +// Inlineable functions +namespace { + +// This table can be used to characterize the final character of an integer +// string. For JSON structural character and allowable white space characters, +// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise +// we return NUMBER_ERROR. +// Optimization note: we could easily reduce the size of the table by half (to 128) +// at the cost of an extra branch. +// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits): +static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast"); +static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast"); + +const uint8_t integer_string_finisher[256] = { + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, + SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, + NUMBER_ERROR}; + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + + +// Parse any number from 0 to 18,446,744,073,709,551,615 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept { + const uint8_t *p = src; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from 0 to 18,446,744,073,709,551,615 +simdjson_unused simdjson_inline simdjson_result parse_unsigned_in_string(const uint8_t * const src) noexcept { + const uint8_t *p = src + 1; + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // The longest positive 64-bit number is 20 digits. + // We do it this way so we don't trigger this branch unless we must. + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > 20)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if (*p != '"') { return NUMBER_ERROR; } + + if (digit_count == 20) { + // Positive overflow check: + // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the + // biggest uint64_t. + // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX. + // If we got here, it's a 20 digit number starting with the digit "1". + // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller + // than 1,553,255,926,290,448,384. + // - That is smaller than the smallest possible 20-digit number the user could write: + // 10,000,000,000,000,000,000. + // - Therefore, if the number is positive and lower than that, it's overflow. + // - The value we are looking at is less than or equal to INT64_MAX. + // + // Note: we use src[1] and not src[0] because src[0] is the quote character in this + // instance. + if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; } + } + + return i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while (parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept { + // + // Check for minus sign + // + if(src == src_end) { return NUMBER_ERROR; } + bool negative = (*src == '-'); + const uint8_t *p = src + uint8_t(negative); + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = p; + uint64_t i = 0; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(p - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*p)) { + // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 +simdjson_unused simdjson_inline simdjson_result parse_integer_in_string(const uint8_t *src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare + const uint8_t *const start_digits = src; + uint64_t i = 0; + while (parse_digit(*src, i)) { src++; } + + // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error. + // Optimization note: size_t is expected to be unsigned. + size_t digit_count = size_t(src - start_digits); + // We go from + // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807 + // so we can never represent numbers that have more than 19 digits. + size_t longest_digit_count = 19; + // Optimization note: the compiler can probably merge + // ((digit_count == 0) || (digit_count > longest_digit_count)) + // into a single branch since digit_count is unsigned. + if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; } + // Here digit_count > 0. + if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; } + // We can do the following... + // if (!jsoncharutils::is_structural_or_whitespace(*src)) { + // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR; + // } + // as a single table lookup: + if(*src != '"') { return NUMBER_ERROR; } + // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX. + // Performance note: This check is only needed when digit_count == longest_digit_count but it is + // so cheap that we might as well always make it. + if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; } + return negative ? (~i+1) : i; +} + +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { + return (*src == '-'); +} + +simdjson_unused simdjson_inline simdjson_result is_integer(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { return true; } + return false; +} + +simdjson_unused simdjson_inline simdjson_result get_number_type(const uint8_t * src) noexcept { + bool negative = (*src == '-'); + src += uint8_t(negative); + const uint8_t *p = src; + while(static_cast(*p - '0') <= 9) { p++; } + if ( p == src ) { return NUMBER_ERROR; } + if (jsoncharutils::is_structural_or_whitespace(*p)) { + // We have an integer. + // If the number is negative and valid, it must be a signed integer. + if(negative) { return number_type::signed_integer; } + // We want values larger or equal to 9223372036854775808 to be unsigned + // integers, and the other values to be signed integers. + int digit_count = int(p - src); + if(digit_count >= 19) { + const uint8_t * smaller_big_integer = reinterpret_cast("9223372036854775808"); + if((digit_count >= 20) || (memcmp(src, smaller_big_integer, 19) >= 0)) { + return number_type::unsigned_integer; + } + } + return number_type::signed_integer; + } + // Hopefully, we have 'e' or 'E' or '.'. + return number_type::floating_point_number; +} + +// Never read at src_end or beyond +simdjson_unused simdjson_inline simdjson_result parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept { + if(src == src_end) { return NUMBER_ERROR; } + // + // Check for minus sign + // + bool negative = (*src == '-'); + src += uint8_t(negative); + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + if(p == src_end) { return NUMBER_ERROR; } + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while ((p != src_end) && parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely((p != src_end) && (*p == '.'))) { + p++; + const uint8_t *start_decimal_digits = p; + if ((p == src_end) || !parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while ((p != src_end) && parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if ((p != src_end) && (*p == 'e' || *p == 'E')) { + p++; + if(p == src_end) { return NUMBER_ERROR; } + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while ((p != src_end) && parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(*p)) { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), src_end, &d)) { + return NUMBER_ERROR; + } + return d; +} + +simdjson_unused simdjson_inline simdjson_result parse_double_in_string(const uint8_t * src) noexcept { + // + // Check for minus sign + // + bool negative = (*(src + 1) == '-'); + src += uint8_t(negative) + 1; + + // + // Parse the integer part. + // + uint64_t i = 0; + const uint8_t *p = src; + p += parse_digit(*p, i); + bool leading_zero = (i == 0); + while (parse_digit(*p, i)) { p++; } + // no integer digits, or 0123 (zero must be solo) + if ( p == src ) { return INCORRECT_TYPE; } + if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; } + + // + // Parse the decimal part. + // + int64_t exponent = 0; + bool overflow; + if (simdjson_likely(*p == '.')) { + p++; + const uint8_t *start_decimal_digits = p; + if (!parse_digit(*p, i)) { return NUMBER_ERROR; } // no decimal digits + p++; + while (parse_digit(*p, i)) { p++; } + exponent = -(p - start_decimal_digits); + + // Overflow check. More than 19 digits (minus the decimal) may be overflow. + overflow = p-src-1 > 19; + if (simdjson_unlikely(overflow && leading_zero)) { + // Skip leading 0.00000 and see if it still overflows + const uint8_t *start_digits = src + 2; + while (*start_digits == '0') { start_digits++; } + overflow = start_digits-src > 19; + } + } else { + overflow = p-src > 19; + } + + // + // Parse the exponent + // + if (*p == 'e' || *p == 'E') { + p++; + bool exp_neg = *p == '-'; + p += exp_neg || *p == '+'; + + uint64_t exp = 0; + const uint8_t *start_exp_digits = p; + while (parse_digit(*p, exp)) { p++; } + // no exp digits, or 20+ exp digits + if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; } + + exponent += exp_neg ? 0-exp : exp; + } + + if (*p != '"') { return NUMBER_ERROR; } + + overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power; + + // + // Assemble (or slow-parse) the float + // + double d; + if (simdjson_likely(!overflow)) { + if (compute_float_64(exponent, i, negative, d)) { return d; } + } + if (!parse_float_fallback(src - uint8_t(negative), &d)) { + return NUMBER_ERROR; + } + return d; +} + +} // unnamed namespace +#endif // SIMDJSON_SKIPNUMBERPARSING + +} // namespace numberparsing + +inline std::ostream& operator<<(std::ostream& out, number_type type) noexcept { + switch (type) { + case number_type::signed_integer: out << "integer in [-9223372036854775808,9223372036854775808)"; break; + case number_type::unsigned_integer: out << "unsigned integer in [9223372036854775808,18446744073709551616)"; break; + case number_type::floating_point_number: out << "floating-point number (binary64)"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_NUMBERPARSING_H +/* end file simdjson/generic/numberparsing.h for westmere */ + +/* including simdjson/generic/implementation_simdjson_result_base-inl.h for westmere: #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* begin file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { + +// +// internal::implementation_simdjson_result_base inline implementation +// + +template +simdjson_inline void implementation_simdjson_result_base::tie(T &value, error_code &error) && noexcept { + error = this->second; + if (!error) { + value = std::forward>(*this).first; + } +} + +template +simdjson_warn_unused simdjson_inline error_code implementation_simdjson_result_base::get(T &value) && noexcept { + error_code error; + std::forward>(*this).tie(value, error); + return error; +} + +template +simdjson_inline error_code implementation_simdjson_result_base::error() const noexcept { + return this->second; +} + +#if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::take_value() && noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::operator T&&() && noexcept(false) { + return std::forward>(*this).take_value(); +} + +#endif // SIMDJSON_EXCEPTIONS + +template +simdjson_inline const T& implementation_simdjson_result_base::value_unsafe() const& noexcept { + return this->first; +} + +template +simdjson_inline T& implementation_simdjson_result_base::value_unsafe() & noexcept { + return this->first; +} + +template +simdjson_inline T&& implementation_simdjson_result_base::value_unsafe() && noexcept { + return std::forward(this->first); +} + +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value, error_code error) noexcept + : first{std::forward(value)}, second{error} {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(error_code error) noexcept + : implementation_simdjson_result_base(T{}, error) {} +template +simdjson_inline implementation_simdjson_result_base::implementation_simdjson_result_base(T &&value) noexcept + : implementation_simdjson_result_base(std::forward(value), SUCCESS) {} + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_IMPLEMENTATION_SIMDJSON_RESULT_BASE_INL_H +/* end file simdjson/generic/implementation_simdjson_result_base-inl.h for westmere */ +/* end file simdjson/generic/amalgamated.h for westmere */ +/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_WESTMERE_H +/* end file simdjson/westmere.h */ +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +#endif // SIMDJSON_BUILTIN_H +/* end file simdjson/builtin.h */ +/* skipped duplicate #include "simdjson/builtin/base.h" */ + +/* including simdjson/generic/ondemand/dependencies.h: #include "simdjson/generic/ondemand/dependencies.h" */ +/* begin file simdjson/generic/ondemand/dependencies.h */ +#ifdef SIMDJSON_CONDITIONAL_INCLUDE +#error simdjson/generic/ondemand/dependencies.h must be included before defining SIMDJSON_CONDITIONAL_INCLUDE! +#endif + +#ifndef SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H +#define SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H + +// Internal headers needed for ondemand generics. +// All includes not under simdjson/generic/ondemand must be here! +// Otherwise, amalgamation will fail. +/* skipped duplicate #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* skipped duplicate #include "simdjson/implementation.h" */ +/* skipped duplicate #include "simdjson/padded_string.h" */ +/* skipped duplicate #include "simdjson/padded_string_view.h" */ +/* skipped duplicate #include "simdjson/internal/dom_parser_implementation.h" */ + +#endif // SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H +/* end file simdjson/generic/ondemand/dependencies.h */ + +/* defining SIMDJSON_CONDITIONAL_INCLUDE */ +#define SIMDJSON_CONDITIONAL_INCLUDE + +#if SIMDJSON_BUILTIN_IMPLEMENTATION_IS(arm64) +/* including simdjson/arm64/ondemand.h: #include "simdjson/arm64/ondemand.h" */ +/* begin file simdjson/arm64/ondemand.h */ +#ifndef SIMDJSON_ARM64_ONDEMAND_H +#define SIMDJSON_ARM64_ONDEMAND_H + +/* including simdjson/arm64/begin.h: #include "simdjson/arm64/begin.h" */ +/* begin file simdjson/arm64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "arm64" */ +#define SIMDJSON_IMPLEMENTATION arm64 +/* including simdjson/arm64/base.h: #include "simdjson/arm64/base.h" */ +/* begin file simdjson/arm64/base.h */ +#ifndef SIMDJSON_ARM64_BASE_H +#define SIMDJSON_ARM64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for NEON (ARMv8). + */ +namespace arm64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BASE_H +/* end file simdjson/arm64/base.h */ +/* including simdjson/arm64/intrinsics.h: #include "simdjson/arm64/intrinsics.h" */ +/* begin file simdjson/arm64/intrinsics.h */ +#ifndef SIMDJSON_ARM64_INTRINSICS_H +#define SIMDJSON_ARM64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +static_assert(sizeof(uint8x16_t) <= simdjson::SIMDJSON_PADDING, "insufficient padding for arm64"); + +#endif // SIMDJSON_ARM64_INTRINSICS_H +/* end file simdjson/arm64/intrinsics.h */ +/* including simdjson/arm64/bitmanipulation.h: #include "simdjson/arm64/bitmanipulation.h" */ +/* begin file simdjson/arm64/bitmanipulation.h */ +#ifndef SIMDJSON_ARM64_BITMANIPULATION_H +#define SIMDJSON_ARM64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int count_ones(uint64_t input_num) { + return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); +} + + +#if defined(__GNUC__) // catches clang and gcc +/** + * ARM has a fast 64-bit "bit reversal function" that is handy. However, + * it is not generally available as an intrinsic function under Visual + * Studio (though this might be changing). Even under clang/gcc, we + * apparently need to invoke inline assembly. + */ +/* + * We use SIMDJSON_PREFER_REVERSE_BITS as a hint that algorithms that + * work well with bit reversal may use it. + */ +#define SIMDJSON_PREFER_REVERSE_BITS 1 + +/* reverse the bits */ +simdjson_inline uint64_t reverse_bits(uint64_t input_num) { + uint64_t rev_bits; + __asm("rbit %0, %1" : "=r"(rev_bits) : "r"(input_num)); + return rev_bits; +} + +/** + * Flips bit at index 63 - lz. Thus if you have 'leading_zeroes' leading zeroes, + * then this will set to zero the leading bit. It is possible for leading_zeroes to be + * greating or equal to 63 in which case we trigger undefined behavior, but the output + * of such undefined behavior is never used. + **/ +SIMDJSON_NO_SANITIZE_UNDEFINED +simdjson_inline uint64_t zero_leading_bit(uint64_t rev_bits, int leading_zeroes) { + return rev_bits ^ (uint64_t(0x8000000000000000) >> leading_zeroes); +} + +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_BITMANIPULATION_H +/* end file simdjson/arm64/bitmanipulation.h */ +/* including simdjson/arm64/bitmask.h: #include "simdjson/arm64/bitmask.h" */ +/* begin file simdjson/arm64/bitmask.h */ +#ifndef SIMDJSON_ARM64_BITMASK_H +#define SIMDJSON_ARM64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + ///////////// + // We could do this with PMULL, but it is apparently slow. + // + //#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension + //return vmull_p64(-1ULL, bitmask); + //#else + // Analysis by @sebpop: + // When diffing the assembly for src/stage1_find_marks.cpp I see that the eors are all spread out + // in between other vector code, so effectively the extra cycles of the sequence do not matter + // because the GPR units are idle otherwise and the critical path is on the FP side. + // Also the PMULL requires two extra fmovs: GPR->FP (3 cycles in N1, 5 cycles in A72 ) + // and FP->GPR (2 cycles on N1 and 5 cycles on A72.) + /////////// + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif +/* end file simdjson/arm64/bitmask.h */ +/* including simdjson/arm64/numberparsing_defs.h: #include "simdjson/arm64/numberparsing_defs.h" */ +/* begin file simdjson/arm64/numberparsing_defs.h */ +#ifndef SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +#define SIMDJSON_ARM64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if _M_ARM64 +// __umulh requires intrin.h +#include +#endif // _M_ARM64 + +namespace simdjson { +namespace arm64 { +namespace numberparsing { + +// we don't have SSE, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace arm64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ARM64_NUMBERPARSING_DEFS_H +/* end file simdjson/arm64/numberparsing_defs.h */ +/* including simdjson/arm64/simd.h: #include "simdjson/arm64/simd.h" */ +/* begin file simdjson/arm64/simd.h */ +#ifndef SIMDJSON_ARM64_SIMD_H +#define SIMDJSON_ARM64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { +namespace simd { + +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO +namespace { +// Start of private section with Visual Studio workaround + + +/** + * make_uint8x16_t initializes a SIMD register (uint8x16_t). + * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...} + * is not recognized under Visual Studio! This is a workaround. + * Using a std::initializer_list as a parameter resulted in + * inefficient code. With the current approach, if the parameters are + * compile-time constants, + * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}. + * You should not use this function except for compile-time constants: + * it is not efficient. + */ +simdjson_inline uint8x16_t make_uint8x16_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8, + uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12, + uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) { + // Doing a load like so end ups generating worse code. + // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_u8(array); + uint8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_u8(x1, x, 0); + x = vsetq_lane_u8(x2, x, 1); + x = vsetq_lane_u8(x3, x, 2); + x = vsetq_lane_u8(x4, x, 3); + x = vsetq_lane_u8(x5, x, 4); + x = vsetq_lane_u8(x6, x, 5); + x = vsetq_lane_u8(x7, x, 6); + x = vsetq_lane_u8(x8, x, 7); + x = vsetq_lane_u8(x9, x, 8); + x = vsetq_lane_u8(x10, x, 9); + x = vsetq_lane_u8(x11, x, 10); + x = vsetq_lane_u8(x12, x, 11); + x = vsetq_lane_u8(x13, x, 12); + x = vsetq_lane_u8(x14, x, 13); + x = vsetq_lane_u8(x15, x, 14); + x = vsetq_lane_u8(x16, x, 15); + return x; +} + +simdjson_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, + uint8_t x5, uint8_t x6, uint8_t x7, uint8_t x8) { + uint8x8_t x{}; + x = vset_lane_u8(x1, x, 0); + x = vset_lane_u8(x2, x, 1); + x = vset_lane_u8(x3, x, 2); + x = vset_lane_u8(x4, x, 3); + x = vset_lane_u8(x5, x, 4); + x = vset_lane_u8(x6, x, 5); + x = vset_lane_u8(x7, x, 6); + x = vset_lane_u8(x8, x, 7); + return x; +} + +// We have to do the same work for make_int8x16_t +simdjson_inline int8x16_t make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, + int8_t x5, int8_t x6, int8_t x7, int8_t x8, + int8_t x9, int8_t x10, int8_t x11, int8_t x12, + int8_t x13, int8_t x14, int8_t x15, int8_t x16) { + // Doing a load like so end ups generating worse code. + // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, + // x9, x10,x11,x12,x13,x14,x15,x16}; + // return vld1q_s8(array); + int8x16_t x{}; + // incredibly, Visual Studio does not allow x[0] = x1 + x = vsetq_lane_s8(x1, x, 0); + x = vsetq_lane_s8(x2, x, 1); + x = vsetq_lane_s8(x3, x, 2); + x = vsetq_lane_s8(x4, x, 3); + x = vsetq_lane_s8(x5, x, 4); + x = vsetq_lane_s8(x6, x, 5); + x = vsetq_lane_s8(x7, x, 6); + x = vsetq_lane_s8(x8, x, 7); + x = vsetq_lane_s8(x9, x, 8); + x = vsetq_lane_s8(x10, x, 9); + x = vsetq_lane_s8(x11, x, 10); + x = vsetq_lane_s8(x12, x, 11); + x = vsetq_lane_s8(x13, x, 12); + x = vsetq_lane_s8(x14, x, 13); + x = vsetq_lane_s8(x15, x, 14); + x = vsetq_lane_s8(x16, x, 15); + return x; +} + +// End of private section with Visual Studio workaround +} // namespace +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO + + + template + struct simd8; + + // + // Base class of simd8 and simd8, both of which use uint8x16_t internally. + // + template> + struct base_u8 { + uint8x16_t value; + static const int SIZE = sizeof(value); + + // Conversion from/to SIMD register + simdjson_inline base_u8(const uint8x16_t _value) : value(_value) {} + simdjson_inline operator const uint8x16_t&() const { return this->value; } + simdjson_inline operator uint8x16_t&() { return this->value; } + + // Bit operations + simdjson_inline simd8 operator|(const simd8 other) const { return vorrq_u8(*this, other); } + simdjson_inline simd8 operator&(const simd8 other) const { return vandq_u8(*this, other); } + simdjson_inline simd8 operator^(const simd8 other) const { return veorq_u8(*this, other); } + simdjson_inline simd8 bit_andnot(const simd8 other) const { return vbicq_u8(*this, other); } + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + simdjson_inline simd8& operator|=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline simd8& operator&=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline simd8& operator^=(const simd8 other) { auto this_cast = static_cast*>(this); *this_cast = *this_cast ^ other; return *this_cast; } + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return vceqq_u8(lhs, rhs); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_u8(prev_chunk, *this, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base_u8 { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + static simdjson_inline simd8 splat(bool _value) { return vmovq_n_u8(uint8_t(-(!!_value))); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // False constructor + simdjson_inline simd8() : simd8(vdupq_n_u8(0)) {} + // Splat constructor + simdjson_inline simd8(bool _value) : simd8(splat(_value)) {} + + // We return uint32_t instead of uint16_t because that seems to be more efficient for most + // purposes (cutting it down to uint16_t costs performance in some compilers). + simdjson_inline uint32_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); +#else + const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}; +#endif + auto minput = *this & bit_mask; + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); + } + simdjson_inline bool any() const { return vmaxvq_u8(*this) != 0; } + }; + + // Unsigned bytes + template<> + struct simd8: base_u8 { + static simdjson_inline uint8x16_t splat(uint8_t _value) { return vmovq_n_u8(_value); } + static simdjson_inline uint8x16_t zero() { return vdupq_n_u8(0); } + static simdjson_inline uint8x16_t load(const uint8_t* values) { return vld1q_u8(values); } + + simdjson_inline simd8(const uint8x16_t _value) : base_u8(_value) {} + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[16]) : simd8(load(values)) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(make_uint8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(uint8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(uint8_t dst[16]) const { return vst1q_u8(dst, *this); } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return vqaddq_u8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return vqsubq_u8(*this, other); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_u8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_u8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-specific operations + simdjson_inline uint8_t max_val() const { return vmaxvq_u8(*this); } + simdjson_inline uint8_t min_val() const { return vminvq_u8(*this); } + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_u8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_u8(*this, other); } + simdjson_inline simd8 operator<=(const simd8 other) const { return vcleq_u8(*this, other); } + simdjson_inline simd8 operator>=(const simd8 other) const { return vcgeq_u8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_u8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_u8(*this, other); } + // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 gt_bits(const simd8 other) const { return simd8(*this > other); } + // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true = nonzero. For ARM, returns all 1's. + simdjson_inline simd8 lt_bits(const simd8 other) const { return simd8(*this < other); } + + // Bit-specific operations + simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } + simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } + template + simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } + template + simdjson_inline simd8 shl() const { return vshlq_n_u8(*this, N); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint16_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + uint64x2_t shufmask64 = {thintable_epi8[mask1], thintable_epi8[mask2]}; + uint8x16_t shufmask = vreinterpretq_u8_u64(shufmask64); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x16_t inc = make_uint8x16_t(0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x16_t inc = {0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + shufmask = vaddq_u8(shufmask, inc); + // this is the version "nearly pruned" + uint8x16_t pruned = vqtbl1q_u8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + uint8x16_t compactmask = vld1q_u8(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + uint8x16_t answer = vqtbl1q_u8(pruned, compactmask); + vst1q_u8(reinterpret_cast(output), answer); + } + + // Copies all bytes corresponding to a 0 in the low half of the mask (interpreted as a + // bitset) to output1, then those corresponding to a 0 in the high half to output2. + template + simdjson_inline void compress_halves(uint16_t mask, L *output1, L *output2) const { + using internal::thintable_epi8; + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + uint8x8_t compactmask1 = vcreate_u8(thintable_epi8[mask1]); + uint8x8_t compactmask2 = vcreate_u8(thintable_epi8[mask2]); + // we increment by 0x08 the second half of the mask +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + uint8x8_t inc = make_uint8x8_t(0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08); +#else + uint8x8_t inc = {0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08}; +#endif + compactmask2 = vadd_u8(compactmask2, inc); + // store each result (with the second store possibly overlapping the first) + vst1_u8((uint8_t*)output1, vqtbl1_u8(*this, compactmask1)); + vst1_u8((uint8_t*)output2, vqtbl1_u8(*this, compactmask2)); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_u8(*this, simd8(original)); + } + }; + + // Signed bytes + template<> + struct simd8 { + int8x16_t value; + + static simdjson_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); } + static simdjson_inline simd8 zero() { return vdupq_n_s8(0); } + static simdjson_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); } + + // Conversion from/to SIMD register + simdjson_inline simd8(const int8x16_t _value) : value{_value} {} + simdjson_inline operator const int8x16_t&() const { return this->value; } + simdjson_inline operator int8x16_t&() { return this->value; } + + // Zero constructor + simdjson_inline simd8() : simd8(zero()) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(make_int8x16_t( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} +#else + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(int8x16_t{ + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + }) {} +#endif + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Store to array + simdjson_inline void store(int8_t dst[16]) const { return vst1q_s8(dst, *this); } + + // Explicit conversion to/from unsigned + // + // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same type. + // In theory, we could check this occurrence with std::same_as and std::enabled_if but it is C++14 + // and relatively ugly and hard to read. +#ifndef SIMDJSON_REGULAR_VISUAL_STUDIO + simdjson_inline explicit simd8(const uint8x16_t other): simd8(vreinterpretq_s8_u8(other)) {} +#endif + simdjson_inline explicit operator simd8() const { return vreinterpretq_u8_s8(this->value); } + + // Math + simdjson_inline simd8 operator+(const simd8 other) const { return vaddq_s8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return vsubq_s8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *this; } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *this; } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return vmaxq_s8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return vminq_s8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return vcgtq_s8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return vcltq_s8(*this, other); } + simdjson_inline simd8 operator==(const simd8 other) const { return vceqq_s8(*this, other); } + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return vextq_s8(prev_chunk, *this, 16 - N); + } + + // Perform a lookup assuming no value is larger than 16 + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return lookup_table.apply_lookup_16_to(*this); + } + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + + template + simdjson_inline simd8 apply_lookup_16_to(const simd8 original) { + return vqtbl1q_s8(*this, simd8(original)); + } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "ARM kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint64_t popcounts = vget_lane_u64(vreinterpret_u64_u8(vcnt_u8(vcreate_u8(~mask))), 0); + // compute the prefix sum of the popcounts of each byte + uint64_t offsets = popcounts * 0x0101010101010101; + this->chunks[0].compress_halves(uint16_t(mask), output, &output[popcounts & 0xFF]); + this->chunks[1].compress_halves(uint16_t(mask >> 16), &output[(offsets >> 8) & 0xFF], &output[(offsets >> 16) & 0xFF]); + this->chunks[2].compress_halves(uint16_t(mask >> 32), &output[(offsets >> 24) & 0xFF], &output[(offsets >> 32) & 0xFF]); + this->chunks[3].compress_halves(uint16_t(mask >> 48), &output[(offsets >> 40) & 0xFF], &output[(offsets >> 48) & 0xFF]); + return offsets >> 56; + } + + simdjson_inline uint64_t to_bitmask() const { +#ifdef SIMDJSON_REGULAR_VISUAL_STUDIO + const uint8x16_t bit_mask = make_uint8x16_t( + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + ); +#else + const uint8x16_t bit_mask = { + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, + 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80 + }; +#endif + // Add each of the elements next to each other, successively, to stuff each 8 byte mask into one. + uint8x16_t sum0 = vpaddq_u8(this->chunks[0] & bit_mask, this->chunks[1] & bit_mask); + uint8x16_t sum1 = vpaddq_u8(this->chunks[2] & bit_mask, this->chunks[3] & bit_mask); + sum0 = vpaddq_u8(sum0, sum1); + sum0 = vpaddq_u8(sum0, sum0); + return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_SIMD_H +/* end file simdjson/arm64/simd.h */ +/* including simdjson/arm64/stringparsing_defs.h: #include "simdjson/arm64/stringparsing_defs.h" */ +/* begin file simdjson/arm64/stringparsing_defs.h */ +#ifndef SIMDJSON_ARM64_STRINGPARSING_DEFS_H +#define SIMDJSON_ARM64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on ARM; therefore, we + // smash them together into a 64-byte mask and get the bitmask from there. + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_ARM64_STRINGPARSING_DEFS_H +/* end file simdjson/arm64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/arm64/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for arm64: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for arm64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for arm64: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::arm64::number_type */ +using number_type = simdjson::arm64::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for arm64 */ +/* including simdjson/generic/ondemand/value_iterator.h for arm64: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; +}; // value_iterator + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/value.h for arm64: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator arm64::ondemand::array() noexcept(false); + simdjson_inline operator arm64::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for arm64 */ +/* including simdjson/generic/ondemand/logger.h for arm64: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for arm64 */ +/* including simdjson/generic/ondemand/token_iterator.h for arm64: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/json_iterator.h for arm64: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/json_type.h for arm64: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for arm64 */ +/* including simdjson/generic/ondemand/raw_json_string.h for arm64: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for arm64 */ +/* including simdjson/generic/ondemand/parser.h for arm64: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for arm64 */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for arm64: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for arm64 */ +/* including simdjson/generic/ondemand/array_iterator.h for arm64: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/document.h for arm64: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value. + * + * @returns A value value. + * @exception if a JSON value cannot be found + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator arm64::ondemand::array() & noexcept(false); + simdjson_inline operator arm64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator arm64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator arm64::ondemand::array() & noexcept(false); + simdjson_inline operator arm64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator arm64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator arm64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for arm64 */ +/* including simdjson/generic/ondemand/document_stream.h for arm64: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline simdjson_result operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for arm64 */ +/* including simdjson/generic/ondemand/field.h for arm64: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for arm64 */ +/* including simdjson/generic/ondemand/object.h for arm64: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for arm64 */ +/* including simdjson/generic/ondemand/object_iterator.h for arm64: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public arm64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(arm64::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for arm64 */ +/* including simdjson/generic/ondemand/serialization.h for arm64: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace arm64 { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::arm64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for arm64 */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for arm64: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for arm64 */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::array_iterator &&value +) noexcept + : arm64::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : arm64::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/document-inl.h for arm64: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + iter.assert_at_document_depth(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator arm64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for arm64 */ +/* including simdjson/generic/ondemand/document_stream-inl.h for arm64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for arm64 */ +/* including simdjson/generic/ondemand/field-inl.h for arm64: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for arm64 */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/json_type-inl.h for arm64: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for arm64 */ +/* including simdjson/generic/ondemand/logger-inl.h for arm64: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace arm64 { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for arm64 */ +/* including simdjson/generic/ondemand/object-inl.h for arm64: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for arm64 */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/parser-inl.h for arm64: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for arm64 */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for arm64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace arm64 { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for arm64 */ +/* including simdjson/generic/ondemand/serialization-inl.h for arm64: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(arm64::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(arm64::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(arm64::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace arm64::ondemand; + arm64::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + arm64::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + arm64::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(arm64::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(arm64::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace arm64 { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::arm64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::arm64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for arm64 */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for arm64 */ +/* including simdjson/generic/ondemand/value-inl.h for arm64: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + arm64::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(arm64::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator arm64::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator arm64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for arm64 */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for arm64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace arm64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(arm64::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ +/* end file simdjson/generic/ondemand/amalgamated.h for arm64 */ +/* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ +/* begin file simdjson/arm64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/arm64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "arm64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/arm64/end.h */ + +#endif // SIMDJSON_ARM64_ONDEMAND_H +/* end file simdjson/arm64/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(fallback) +/* including simdjson/fallback/ondemand.h: #include "simdjson/fallback/ondemand.h" */ +/* begin file simdjson/fallback/ondemand.h */ +#ifndef SIMDJSON_FALLBACK_ONDEMAND_H +#define SIMDJSON_FALLBACK_ONDEMAND_H + +/* including simdjson/fallback/begin.h: #include "simdjson/fallback/begin.h" */ +/* begin file simdjson/fallback/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "fallback" */ +#define SIMDJSON_IMPLEMENTATION fallback +/* including simdjson/fallback/base.h: #include "simdjson/fallback/base.h" */ +/* begin file simdjson/fallback/base.h */ +#ifndef SIMDJSON_FALLBACK_BASE_H +#define SIMDJSON_FALLBACK_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Fallback implementation (runs on any machine). + */ +namespace fallback { + +class implementation; + +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BASE_H +/* end file simdjson/fallback/base.h */ +/* including simdjson/fallback/bitmanipulation.h: #include "simdjson/fallback/bitmanipulation.h" */ +/* begin file simdjson/fallback/bitmanipulation.h */ +#ifndef SIMDJSON_FALLBACK_BITMANIPULATION_H +#define SIMDJSON_FALLBACK_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64) +static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) { + unsigned long x0 = (unsigned long)x, top, bottom; + _BitScanForward(&top, (unsigned long)(x >> 32)); + _BitScanForward(&bottom, x0); + *ret = x0 ? bottom : 32 + top; + return x != 0; +} +static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) { + unsigned long x1 = (unsigned long)(x >> 32), top, bottom; + _BitScanReverse(&top, x1); + _BitScanReverse(&bottom, (unsigned long)x); + *ret = x1 ? top + 32 : bottom; + return x != 0; +} +#endif + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#ifdef _MSC_VER + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// _MSC_VER +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_BITMANIPULATION_H +/* end file simdjson/fallback/bitmanipulation.h */ +/* including simdjson/fallback/stringparsing_defs.h: #include "simdjson/fallback/stringparsing_defs.h" */ +/* begin file simdjson/fallback/stringparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +#define SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace { + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return c == '"'; } + simdjson_inline bool has_backslash() { return c == '\\'; } + simdjson_inline int quote_index() { return c == '"' ? 0 : 1; } + simdjson_inline int backslash_index() { return c == '\\' ? 0 : 1; } + + uint8_t c; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // store to dest unconditionally - we can overwrite the bits we don't like later + dst[0] = src[0]; + return { src[0] }; +} + +} // unnamed namespace +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_FALLBACK_STRINGPARSING_DEFS_H +/* end file simdjson/fallback/stringparsing_defs.h */ +/* including simdjson/fallback/numberparsing_defs.h: #include "simdjson/fallback/numberparsing_defs.h" */ +/* begin file simdjson/fallback/numberparsing_defs.h */ +#ifndef SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +#define SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#ifdef JSON_TEST_NUMBERS // for unit testing +void found_invalid_number(const uint8_t *buf); +void found_integer(int64_t result, const uint8_t *buf); +void found_unsigned_integer(uint64_t result, const uint8_t *buf); +void found_float(double result, const uint8_t *buf); +#endif + +namespace simdjson { +namespace fallback { +namespace numberparsing { + +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const char *chars) { + uint64_t val; + memcpy(&val, chars, sizeof(uint64_t)); + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + return parse_eight_digits_unrolled(reinterpret_cast(chars)); +} + +#if SIMDJSON_IS_32BITS // _umul128 for x86, arm +// this is a slow emulation routine for 32-bit +// +static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} +static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = !!(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + !!(lo < bd); + return lo; +} +#endif + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace fallback +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_FALLBACK_NUMBERPARSING_DEFS_H +/* end file simdjson/fallback/numberparsing_defs.h */ +/* end file simdjson/fallback/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for fallback: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for fallback */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for fallback: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::fallback::number_type */ +using number_type = simdjson::fallback::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for fallback */ +/* including simdjson/generic/ondemand/value_iterator.h for fallback: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; +}; // value_iterator + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for fallback */ +/* including simdjson/generic/ondemand/value.h for fallback: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator fallback::ondemand::array() noexcept(false); + simdjson_inline operator fallback::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for fallback */ +/* including simdjson/generic/ondemand/logger.h for fallback: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for fallback */ +/* including simdjson/generic/ondemand/token_iterator.h for fallback: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for fallback */ +/* including simdjson/generic/ondemand/json_iterator.h for fallback: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for fallback */ +/* including simdjson/generic/ondemand/json_type.h for fallback: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for fallback */ +/* including simdjson/generic/ondemand/raw_json_string.h for fallback: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for fallback */ +/* including simdjson/generic/ondemand/parser.h for fallback: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for fallback */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for fallback: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for fallback */ +/* including simdjson/generic/ondemand/array_iterator.h for fallback: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for fallback */ +/* including simdjson/generic/ondemand/document.h for fallback: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value. + * + * @returns A value value. + * @exception if a JSON value cannot be found + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator fallback::ondemand::array() & noexcept(false); + simdjson_inline operator fallback::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator fallback::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator fallback::ondemand::array() & noexcept(false); + simdjson_inline operator fallback::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator fallback::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator fallback::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for fallback */ +/* including simdjson/generic/ondemand/document_stream.h for fallback: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace fallback { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline simdjson_result operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for fallback */ +/* including simdjson/generic/ondemand/field.h for fallback: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for fallback */ +/* including simdjson/generic/ondemand/object.h for fallback: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for fallback */ +/* including simdjson/generic/ondemand/object_iterator.h for fallback: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public fallback::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(fallback::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for fallback */ +/* including simdjson/generic/ondemand/serialization.h for fallback: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace fallback { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::fallback::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for fallback */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for fallback: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for fallback */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::array_iterator &&value +) noexcept + : fallback::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : fallback::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/document-inl.h for fallback: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + iter.assert_at_document_depth(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator fallback::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for fallback */ +/* including simdjson/generic/ondemand/document_stream-inl.h for fallback: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace fallback { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for fallback */ +/* including simdjson/generic/ondemand/field-inl.h for fallback: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_type-inl.h for fallback: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for fallback */ +/* including simdjson/generic/ondemand/logger-inl.h for fallback: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace fallback { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for fallback */ +/* including simdjson/generic/ondemand/object-inl.h for fallback: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for fallback */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/parser-inl.h for fallback: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for fallback */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for fallback: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace fallback { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for fallback */ +/* including simdjson/generic/ondemand/serialization-inl.h for fallback: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(fallback::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(fallback::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(fallback::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace fallback::ondemand; + fallback::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + fallback::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + fallback::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(fallback::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(fallback::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace fallback { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::fallback::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::fallback::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for fallback */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for fallback */ +/* including simdjson/generic/ondemand/value-inl.h for fallback: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + fallback::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(fallback::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator fallback::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator fallback::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for fallback */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for fallback: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace fallback +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(fallback::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ +/* end file simdjson/generic/ondemand/amalgamated.h for fallback */ +/* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ +/* begin file simdjson/fallback/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/fallback/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* undefining SIMDJSON_IMPLEMENTATION from "fallback" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/fallback/end.h */ + +#endif // SIMDJSON_FALLBACK_ONDEMAND_H +/* end file simdjson/fallback/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(haswell) +/* including simdjson/haswell/ondemand.h: #include "simdjson/haswell/ondemand.h" */ +/* begin file simdjson/haswell/ondemand.h */ +#ifndef SIMDJSON_HASWELL_ONDEMAND_H +#define SIMDJSON_HASWELL_ONDEMAND_H + +/* including simdjson/haswell/begin.h: #include "simdjson/haswell/begin.h" */ +/* begin file simdjson/haswell/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "haswell" */ +#define SIMDJSON_IMPLEMENTATION haswell + +/* including simdjson/haswell/base.h: #include "simdjson/haswell/base.h" */ +/* begin file simdjson/haswell/base.h */ +#ifndef SIMDJSON_HASWELL_BASE_H +#define SIMDJSON_HASWELL_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_HASWELL +namespace simdjson { +/** + * Implementation for Haswell (Intel AVX2). + */ +namespace haswell { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BASE_H +/* end file simdjson/haswell/base.h */ +/* including simdjson/haswell/intrinsics.h: #include "simdjson/haswell/intrinsics.h" */ +/* begin file simdjson/haswell/intrinsics.h */ +#ifndef SIMDJSON_HASWELL_INTRINSICS_H +#define SIMDJSON_HASWELL_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m256i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for haswell kernel."); + +#endif // SIMDJSON_HASWELL_INTRINSICS_H +/* end file simdjson/haswell/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/haswell/bitmanipulation.h: #include "simdjson/haswell/bitmanipulation.h" */ +/* begin file simdjson/haswell/bitmanipulation.h */ +#ifndef SIMDJSON_HASWELL_BITMANIPULATION_H +#define SIMDJSON_HASWELL_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmask.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMANIPULATION_H +/* end file simdjson/haswell/bitmanipulation.h */ +/* including simdjson/haswell/bitmask.h: #include "simdjson/haswell/bitmask.h" */ +/* begin file simdjson/haswell/bitmask.h */ +#ifndef SIMDJSON_HASWELL_BITMASK_H +#define SIMDJSON_HASWELL_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_BITMASK_H +/* end file simdjson/haswell/bitmask.h */ +/* including simdjson/haswell/numberparsing_defs.h: #include "simdjson/haswell/numberparsing_defs.h" */ +/* begin file simdjson/haswell/numberparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +#define SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace haswell +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_HASWELL_NUMBERPARSING_DEFS_H +/* end file simdjson/haswell/numberparsing_defs.h */ +/* including simdjson/haswell/simd.h: #include "simdjson/haswell/simd.h" */ +/* begin file simdjson/haswell/simd.h */ +#ifndef SIMDJSON_HASWELL_SIMD_H +#define SIMDJSON_HASWELL_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m256i value; + + // Zero constructor + simdjson_inline base() : value{__m256i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m256i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m256i&() const { return this->value; } + simdjson_inline operator __m256i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm256_or_si256(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm256_and_si256(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm256_xor_si256(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm256_andnot_si256(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m256i _value) : base>(_value) {} + + friend simdjson_really_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm256_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm256_alignr_epi8(*this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm256_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m256i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm256_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm256_testz_si256(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm256_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm256_setzero_si256(); } + static simdjson_inline simd8 load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m256i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[32]) const { return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm256_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm256_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm256_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint32_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in four steps, first 8 bytes and then second 8 bytes... + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // second least significant 8 bits + uint8_t mask3 = uint8_t(mask >> 16); // ... + uint8_t mask4 = uint8_t(mask >> 24); // ... + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m256i shufmask = _mm256_set_epi64x(thintable_epi8[mask4], thintable_epi8[mask3], + thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask and so forth + shufmask = + _mm256_add_epi8(shufmask, _mm256_set_epi32(0x18181818, 0x18181818, + 0x10101010, 0x10101010, 0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m256i pruned = _mm256_shuffle_epi8(*this, shufmask); + // we still need to put the pieces back together. + // we compute the popcount of the first words: + int pop1 = BitsSetTable256mul2[mask1]; + int pop3 = BitsSetTable256mul2[mask3]; + + // then load the corresponding mask + // could be done with _mm256_loadu2_m128i but many standard libraries omit this intrinsic. + __m256i v256 = _mm256_castsi128_si256( + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8))); + __m256i compactmask = _mm256_insertf128_si256(v256, + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop3 * 8)), 1); + __m256i almostthere = _mm256_shuffle_epi8(pruned, compactmask); + // We just need to write out the result. + // This is the tricky bit that is hard to do + // if we want to return a SIMD register, since there + // is no single-instruction approach to recombine + // the two 128-bit lanes with an offset. + __m128i v128; + v128 = _mm256_castsi256_si128(almostthere); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output), v128); + v128 = _mm256_extractf128_si256(almostthere, 1); + _mm_storeu_si128( reinterpret_cast<__m128i *>(output + 16 - count_ones(mask & 0xFFFF)), v128); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm256_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm256_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m256i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[32]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31 + ) : simd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm256_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm256_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm256_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm256_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm256_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm256_testz_si256(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm256_testz_si256(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 2, "Haswell kernel should use two registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+32)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + uint32_t mask1 = uint32_t(mask); + uint32_t mask2 = uint32_t(mask >> 32); + this->chunks[0].compress(mask1, output); + this->chunks[1].compress(mask2, output + 32 - count_ones(mask1)); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r_hi = this->chunks[1].to_bitmask(); + return r_lo | (r_hi << 32); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0] | this->chunks[1]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask, + this->chunks[1] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_SIMD_H +/* end file simdjson/haswell/simd.h */ +/* including simdjson/haswell/stringparsing_defs.h: #include "simdjson/haswell/stringparsing_defs.h" */ +/* begin file simdjson/haswell/stringparsing_defs.h */ +#ifndef SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +#define SIMDJSON_HASWELL_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast((v == '\\').to_bitmask()), // bs_bits + static_cast((v == '"').to_bitmask()), // quote_bits + }; +} + +} // unnamed namespace +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_HASWELL_STRINGPARSING_DEFS_H +/* end file simdjson/haswell/stringparsing_defs.h */ +/* end file simdjson/haswell/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for haswell: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for haswell */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for haswell: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::haswell::number_type */ +using number_type = simdjson::haswell::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for haswell */ +/* including simdjson/generic/ondemand/value_iterator.h for haswell: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; +}; // value_iterator + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for haswell */ +/* including simdjson/generic/ondemand/value.h for haswell: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator haswell::ondemand::array() noexcept(false); + simdjson_inline operator haswell::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for haswell */ +/* including simdjson/generic/ondemand/logger.h for haswell: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for haswell */ +/* including simdjson/generic/ondemand/token_iterator.h for haswell: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for haswell */ +/* including simdjson/generic/ondemand/json_iterator.h for haswell: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for haswell */ +/* including simdjson/generic/ondemand/json_type.h for haswell: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for haswell */ +/* including simdjson/generic/ondemand/raw_json_string.h for haswell: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for haswell */ +/* including simdjson/generic/ondemand/parser.h for haswell: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for haswell */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for haswell: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for haswell */ +/* including simdjson/generic/ondemand/array_iterator.h for haswell: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for haswell */ +/* including simdjson/generic/ondemand/document.h for haswell: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value. + * + * @returns A value value. + * @exception if a JSON value cannot be found + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator haswell::ondemand::array() & noexcept(false); + simdjson_inline operator haswell::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator haswell::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator haswell::ondemand::array() & noexcept(false); + simdjson_inline operator haswell::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator haswell::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator haswell::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for haswell */ +/* including simdjson/generic/ondemand/document_stream.h for haswell: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace haswell { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline simdjson_result operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for haswell */ +/* including simdjson/generic/ondemand/field.h for haswell: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for haswell */ +/* including simdjson/generic/ondemand/object.h for haswell: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for haswell */ +/* including simdjson/generic/ondemand/object_iterator.h for haswell: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public haswell::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(haswell::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for haswell */ +/* including simdjson/generic/ondemand/serialization.h for haswell: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace haswell { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::haswell::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for haswell */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for haswell: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for haswell */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::array_iterator &&value +) noexcept + : haswell::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : haswell::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/document-inl.h for haswell: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + iter.assert_at_document_depth(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator haswell::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for haswell */ +/* including simdjson/generic/ondemand/document_stream-inl.h for haswell: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace haswell { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for haswell */ +/* including simdjson/generic/ondemand/field-inl.h for haswell: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for haswell */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/json_type-inl.h for haswell: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for haswell */ +/* including simdjson/generic/ondemand/logger-inl.h for haswell: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace haswell { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for haswell */ +/* including simdjson/generic/ondemand/object-inl.h for haswell: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for haswell */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/parser-inl.h for haswell: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for haswell */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for haswell: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace haswell { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for haswell */ +/* including simdjson/generic/ondemand/serialization-inl.h for haswell: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(haswell::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(haswell::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(haswell::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace haswell::ondemand; + haswell::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + haswell::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + haswell::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(haswell::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(haswell::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace haswell { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::haswell::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::haswell::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for haswell */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for haswell */ +/* including simdjson/generic/ondemand/value-inl.h for haswell: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + haswell::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(haswell::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator haswell::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator haswell::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for haswell */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for haswell: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace haswell +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(haswell::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ +/* end file simdjson/generic/ondemand/amalgamated.h for haswell */ +/* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ +/* begin file simdjson/haswell/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/haswell/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_HASWELL +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "haswell" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/haswell/end.h */ + +#endif // SIMDJSON_HASWELL_ONDEMAND_H +/* end file simdjson/haswell/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(icelake) +/* including simdjson/icelake/ondemand.h: #include "simdjson/icelake/ondemand.h" */ +/* begin file simdjson/icelake/ondemand.h */ +#ifndef SIMDJSON_ICELAKE_ONDEMAND_H +#define SIMDJSON_ICELAKE_ONDEMAND_H + +/* including simdjson/icelake/begin.h: #include "simdjson/icelake/begin.h" */ +/* begin file simdjson/icelake/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "icelake" */ +#define SIMDJSON_IMPLEMENTATION icelake +/* including simdjson/icelake/base.h: #include "simdjson/icelake/base.h" */ +/* begin file simdjson/icelake/base.h */ +#ifndef SIMDJSON_ICELAKE_BASE_H +#define SIMDJSON_ICELAKE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_ICELAKE +namespace simdjson { +/** + * Implementation for Icelake (Intel AVX512). + */ +namespace icelake { + +class implementation; + +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BASE_H +/* end file simdjson/icelake/base.h */ +/* including simdjson/icelake/intrinsics.h: #include "simdjson/icelake/intrinsics.h" */ +/* begin file simdjson/icelake/intrinsics.h */ +#ifndef SIMDJSON_ICELAKE_INTRINSICS_H +#define SIMDJSON_ICELAKE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + * e.g., if __AVX2__ is set... in turn, we normally set these + * macros by compiling against the corresponding architecture + * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole + * software with these advanced instructions. In simdjson, we + * want to compile the whole program for a generic target, + * and only target our specific kernels. As a workaround, + * we directly include the needed headers. These headers would + * normally guard against such usage, but we carefully included + * (or ) before, so the headers + * are fooled. + */ +#include // for _blsr_u64 +#include // for __lzcnt64 +#include // for most things (AVX2, AVX512, _popcnt64) +#include +#include +#include +#include +#include // for _mm_clmulepi64_si128 +// Important: we need the AVX-512 headers: +#include +#include +#include +#include +#include +#include +#include +// unfortunately, we may not get _blsr_u64, but, thankfully, clang +// has it as a macro. +#ifndef _blsr_u64 +// we roll our own +#define _blsr_u64(n) ((n - 1) & n) +#endif // _blsr_u64 +#endif // SIMDJSON_CLANG_VISUAL_STUDIO + +static_assert(sizeof(__m512i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for icelake"); + +#endif // SIMDJSON_ICELAKE_INTRINSICS_H +/* end file simdjson/icelake/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,pclmul,lzcnt,popcnt") +#endif + +/* including simdjson/icelake/bitmanipulation.h: #include "simdjson/icelake/bitmanipulation.h" */ +/* begin file simdjson/icelake/bitmanipulation.h */ +#ifndef SIMDJSON_ICELAKE_BITMANIPULATION_H +#define SIMDJSON_ICELAKE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return (int)_tzcnt_u64(input_num); +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + //////// + // You might expect the next line to be equivalent to + // return (int)_tzcnt_u64(input_num); + // but the generated code differs and might be less efficient? + //////// + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return _blsr_u64(input_num); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { + return int(_lzcnt_u64(input_num)); +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMANIPULATION_H +/* end file simdjson/icelake/bitmanipulation.h */ +/* including simdjson/icelake/bitmask.h: #include "simdjson/icelake/bitmask.h" */ +/* begin file simdjson/icelake/bitmask.h */ +#ifndef SIMDJSON_ICELAKE_BITMASK_H +#define SIMDJSON_ICELAKE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processor supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_BITMASK_H +/* end file simdjson/icelake/bitmask.h */ +/* including simdjson/icelake/simd.h: #include "simdjson/icelake/simd.h" */ +/* begin file simdjson/icelake/simd.h */ +#ifndef SIMDJSON_ICELAKE_SIMD_H +#define SIMDJSON_ICELAKE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if defined(__GNUC__) && !defined(__clang__) +#if __GNUC__ == 8 +#define SIMDJSON_GCC8 1 +#endif // __GNUC__ == 8 +#endif // defined(__GNUC__) && !defined(__clang__) + +#if SIMDJSON_GCC8 +/** + * GCC 8 fails to provide _mm512_set_epi8. We roll our own. + */ +inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9, uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14, uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19, uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24, uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29, uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34, uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39, uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44, uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49, uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54, uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59, uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) { + return _mm512_set_epi64(uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) + (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) + (uint64_t(a1) << 48) + (uint64_t(a0) << 56), + uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) + (uint64_t(a12) << 24) + (uint64_t(a11) << 32) + (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56), + uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) + (uint64_t(a20) << 24) + (uint64_t(a19) << 32) + (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56), + uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) + (uint64_t(a28) << 24) + (uint64_t(a27) << 32) + (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56), + uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) + (uint64_t(a36) << 24) + (uint64_t(a35) << 32) + (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56), + uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) + (uint64_t(a44) << 24) + (uint64_t(a43) << 32) + (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56), + uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) + (uint64_t(a52) << 24) + (uint64_t(a51) << 32) + (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56), + uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) + (uint64_t(a60) << 24) + (uint64_t(a59) << 32) + (uint64_t(a58) << 40) + (uint64_t(a57) << 48) + (uint64_t(a56) << 56)); +} +#endif // SIMDJSON_GCC8 + + + +namespace simdjson { +namespace icelake { +namespace { +namespace simd { + + // Forward-declared so they can be used by splat and friends. + template + struct base { + __m512i value; + + // Zero constructor + simdjson_inline base() : value{__m512i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m512i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m512i&() const { return this->value; } + simdjson_inline operator __m512i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm512_or_si512(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm512_and_si512(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm512_xor_si512(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm512_andnot_si512(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + // Forward-declared so they can be used by splat and friends. + template + struct simd8; + + template> + struct base8: base> { + typedef uint32_t bitmask_t; + typedef uint64_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m512i _value) : base>(_value) {} + + friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { + return _mm512_cmpeq_epi8_mask(lhs, rhs); + } + + static const int SIZE = sizeof(base::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + // workaround for compilers unable to figure out that 16 - N is a constant (GCC 8) + constexpr int shift = 16 - N; + return _mm512_alignr_epi8(*this, _mm512_permutex2var_epi64(prev_chunk, _mm512_set_epi64(13, 12, 11, 10, 9, 8, 7, 6), *this), shift); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm512_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m512i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + simdjson_inline bool any() const { return !!_mm512_test_epi8_mask (*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm512_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm512_setzero_si512(); } + static simdjson_inline simd8 load(const T values[64]) { + return _mm512_loadu_si512(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m512i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[64]) const { return _mm512_storeu_si512(reinterpret_cast<__m512i *>(dst), *this); } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm512_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm512_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm512_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint64_t mask, L * output) const { + _mm512_mask_compressstoreu_epi8 (output,~mask,*this); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15, + int8_t v16, int8_t v17, int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23, + int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29, int8_t v30, int8_t v31, + int8_t v32, int8_t v33, int8_t v34, int8_t v35, int8_t v36, int8_t v37, int8_t v38, int8_t v39, + int8_t v40, int8_t v41, int8_t v42, int8_t v43, int8_t v44, int8_t v45, int8_t v46, int8_t v47, + int8_t v48, int8_t v49, int8_t v50, int8_t v51, int8_t v52, int8_t v53, int8_t v54, int8_t v55, + int8_t v56, int8_t v57, int8_t v58, int8_t v59, int8_t v60, int8_t v61, int8_t v62, int8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epi8(*this, other); } + + simdjson_inline simd8 operator>(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(*this, other),_mm512_set1_epi8(uint8_t(0x80))); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm512_maskz_abs_epi8(_mm512_cmpgt_epi8_mask(other, *this),_mm512_set1_epi8(uint8_t(0x80))); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m512i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t values[64]) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15, + uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20, uint8_t v21, uint8_t v22, uint8_t v23, + uint8_t v24, uint8_t v25, uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30, uint8_t v31, + uint8_t v32, uint8_t v33, uint8_t v34, uint8_t v35, uint8_t v36, uint8_t v37, uint8_t v38, uint8_t v39, + uint8_t v40, uint8_t v41, uint8_t v42, uint8_t v43, uint8_t v44, uint8_t v45, uint8_t v46, uint8_t v47, + uint8_t v48, uint8_t v49, uint8_t v50, uint8_t v51, uint8_t v52, uint8_t v53, uint8_t v54, uint8_t v55, + uint8_t v56, uint8_t v57, uint8_t v58, uint8_t v59, uint8_t v60, uint8_t v61, uint8_t v62, uint8_t v63 + ) : simd8(_mm512_set_epi8( + v63, v62, v61, v60, v59, v58, v57, v56, + v55, v54, v53, v52, v51, v50, v49, v48, + v47, v46, v45, v44, v43, v42, v41, v40, + v39, v38, v37, v36, v35, v34, v33, v32, + v31, v30, v29, v28, v27, v26, v25, v24, + v23, v22, v21, v20, v19, v18, v17, v16, + v15, v14, v13, v12, v11, v10, v9, v8, + v7, v6, v5, v4, v3, v2, v1, v0 + )) {} + + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm512_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm512_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm512_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm512_min_epu8(other, *this); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline uint64_t operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline uint64_t operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->lt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return _mm512_mask_blend_epi8(*this == uint8_t(0), _mm512_set1_epi8(0), _mm512_set1_epi8(-1)); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + + simdjson_inline bool is_ascii() const { return _mm512_movepi8_mask(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { + return !_mm512_test_epi8_mask(*this, *this); + } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return !_mm512_test_epi8_mask(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm512_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm512_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline uint64_t get_bit() const { return _mm512_movepi8_mask(_mm512_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 1, "Icelake kernel should use one register per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1) : chunks{chunk0, chunk1} {} + simdjson_inline simd8x64(const simd8 chunk0) : chunks{chunk0} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr)} {} + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(mask, output); + return 64 - count_ones(mask); + } + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + } + + simdjson_inline simd8 reduce_or() const { + return this->chunks[0]; + } + + simdjson_inline simd8x64 bit_or(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] | mask + ); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] == mask; + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return this->chunks[0] == other.chunks[0]; + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return this->chunks[0] <= mask; + } + }; // struct simd8x64 + +} // namespace simd + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_SIMD_H +/* end file simdjson/icelake/simd.h */ +/* including simdjson/icelake/stringparsing_defs.h: #include "simdjson/icelake/stringparsing_defs.h" */ +/* begin file simdjson/icelake/stringparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +#define SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/simd.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return ((quote_bits - 1) & bs_bits) != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint64_t bs_bits; + uint64_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 15 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + // store to dest unconditionally - we can overwrite the bits we don't like later + v.store(dst); + return { + static_cast(v == '\\'), // bs_bits + static_cast(v == '"'), // quote_bits + }; +} + +} // unnamed namespace +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_ICELAKE_STRINGPARSING_DEFS_H +/* end file simdjson/icelake/stringparsing_defs.h */ +/* including simdjson/icelake/numberparsing_defs.h: #include "simdjson/icelake/numberparsing_defs.h" */ +/* begin file simdjson/icelake/numberparsing_defs.h */ +#ifndef SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +#define SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace numberparsing { + +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace icelake +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_ICELAKE_NUMBERPARSING_DEFS_H +/* end file simdjson/icelake/numberparsing_defs.h */ +/* end file simdjson/icelake/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for icelake: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for icelake */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for icelake: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::icelake::number_type */ +using number_type = simdjson::icelake::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for icelake */ +/* including simdjson/generic/ondemand/value_iterator.h for icelake: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; +}; // value_iterator + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for icelake */ +/* including simdjson/generic/ondemand/value.h for icelake: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator icelake::ondemand::array() noexcept(false); + simdjson_inline operator icelake::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for icelake */ +/* including simdjson/generic/ondemand/logger.h for icelake: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for icelake */ +/* including simdjson/generic/ondemand/token_iterator.h for icelake: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for icelake */ +/* including simdjson/generic/ondemand/json_iterator.h for icelake: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for icelake */ +/* including simdjson/generic/ondemand/json_type.h for icelake: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for icelake */ +/* including simdjson/generic/ondemand/raw_json_string.h for icelake: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for icelake */ +/* including simdjson/generic/ondemand/parser.h for icelake: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for icelake */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for icelake: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for icelake */ +/* including simdjson/generic/ondemand/array_iterator.h for icelake: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for icelake */ +/* including simdjson/generic/ondemand/document.h for icelake: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value. + * + * @returns A value value. + * @exception if a JSON value cannot be found + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator icelake::ondemand::array() & noexcept(false); + simdjson_inline operator icelake::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator icelake::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator icelake::ondemand::array() & noexcept(false); + simdjson_inline operator icelake::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator icelake::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator icelake::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for icelake */ +/* including simdjson/generic/ondemand/document_stream.h for icelake: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace icelake { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline simdjson_result operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for icelake */ +/* including simdjson/generic/ondemand/field.h for icelake: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for icelake */ +/* including simdjson/generic/ondemand/object.h for icelake: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for icelake */ +/* including simdjson/generic/ondemand/object_iterator.h for icelake: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public icelake::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(icelake::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for icelake */ +/* including simdjson/generic/ondemand/serialization.h for icelake: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace icelake { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::icelake::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for icelake */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for icelake: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for icelake */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::array_iterator &&value +) noexcept + : icelake::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : icelake::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/document-inl.h for icelake: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + iter.assert_at_document_depth(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator icelake::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for icelake */ +/* including simdjson/generic/ondemand/document_stream-inl.h for icelake: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace icelake { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for icelake */ +/* including simdjson/generic/ondemand/field-inl.h for icelake: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_type-inl.h for icelake: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for icelake */ +/* including simdjson/generic/ondemand/logger-inl.h for icelake: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace icelake { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for icelake */ +/* including simdjson/generic/ondemand/object-inl.h for icelake: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for icelake */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/parser-inl.h for icelake: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for icelake */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for icelake: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace icelake { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for icelake */ +/* including simdjson/generic/ondemand/serialization-inl.h for icelake: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(icelake::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(icelake::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(icelake::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace icelake::ondemand; + icelake::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + icelake::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + icelake::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(icelake::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(icelake::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace icelake { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::icelake::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::icelake::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for icelake */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for icelake */ +/* including simdjson/generic/ondemand/value-inl.h for icelake: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + icelake::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(icelake::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator icelake::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator icelake::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for icelake */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for icelake: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace icelake +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(icelake::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ +/* end file simdjson/generic/ondemand/amalgamated.h for icelake */ +/* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ +/* begin file simdjson/icelake/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/icelake/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_ICELAKE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "icelake" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/icelake/end.h */ + +#endif // SIMDJSON_ICELAKE_ONDEMAND_H +/* end file simdjson/icelake/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(ppc64) +/* including simdjson/ppc64/ondemand.h: #include "simdjson/ppc64/ondemand.h" */ +/* begin file simdjson/ppc64/ondemand.h */ +#ifndef SIMDJSON_PPC64_ONDEMAND_H +#define SIMDJSON_PPC64_ONDEMAND_H + +/* including simdjson/ppc64/begin.h: #include "simdjson/ppc64/begin.h" */ +/* begin file simdjson/ppc64/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "ppc64" */ +#define SIMDJSON_IMPLEMENTATION ppc64 +/* including simdjson/ppc64/base.h: #include "simdjson/ppc64/base.h" */ +/* begin file simdjson/ppc64/base.h */ +#ifndef SIMDJSON_PPC64_BASE_H +#define SIMDJSON_PPC64_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Implementation for ALTIVEC (PPC64). + */ +namespace ppc64 { + +class implementation; + +namespace { +namespace simd { +template struct simd8; +template struct simd8x64; +} // namespace simd +} // unnamed namespace + +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BASE_H +/* end file simdjson/ppc64/base.h */ +/* including simdjson/ppc64/intrinsics.h: #include "simdjson/ppc64/intrinsics.h" */ +/* begin file simdjson/ppc64/intrinsics.h */ +#ifndef SIMDJSON_PPC64_INTRINSICS_H +#define SIMDJSON_PPC64_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// This should be the correct header whether +// you use visual studio or other compilers. +#include + +// These are defined by altivec.h in GCC toolchain, it is safe to undef them. +#ifdef bool +#undef bool +#endif + +#ifdef vector +#undef vector +#endif + +static_assert(sizeof(__vector unsigned char) <= simdjson::SIMDJSON_PADDING, "insufficient padding for ppc64"); + +#endif // SIMDJSON_PPC64_INTRINSICS_H +/* end file simdjson/ppc64/intrinsics.h */ +/* including simdjson/ppc64/bitmanipulation.h: #include "simdjson/ppc64/bitmanipulation.h" */ +/* begin file simdjson/ppc64/bitmanipulation.h */ +#ifndef SIMDJSON_PPC64_BITMANIPULATION_H +#define SIMDJSON_PPC64_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num - 1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline int count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num); // Visual Studio wants two underscores +} +#else +simdjson_inline int count_ones(uint64_t input_num) { + return __builtin_popcountll(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + *result = value1 + value2; + return *result < value1; +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_BITMANIPULATION_H +/* end file simdjson/ppc64/bitmanipulation.h */ +/* including simdjson/ppc64/bitmask.h: #include "simdjson/ppc64/bitmask.h" */ +/* begin file simdjson/ppc64/bitmask.h */ +#ifndef SIMDJSON_PPC64_BITMASK_H +#define SIMDJSON_PPC64_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is +// encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(uint64_t bitmask) { + // You can use the version below, however gcc sometimes miscompiles + // vec_pmsum_be, it happens somewhere around between 8 and 9th version. + // The performance boost was not noticeable, falling back to a usual + // implementation. + // __vector unsigned long long all_ones = {~0ull, ~0ull}; + // __vector unsigned long long mask = {bitmask, 0}; + // // Clang and GCC return different values for pmsum for ull so cast it to one. + // // Generally it is not specified by ALTIVEC ISA what is returned by + // // vec_pmsum_be. + // #if defined(__LITTLE_ENDIAN__) + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); + // #else + // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); + // #endif + bitmask ^= bitmask << 1; + bitmask ^= bitmask << 2; + bitmask ^= bitmask << 4; + bitmask ^= bitmask << 8; + bitmask ^= bitmask << 16; + bitmask ^= bitmask << 32; + return bitmask; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif +/* end file simdjson/ppc64/bitmask.h */ +/* including simdjson/ppc64/numberparsing_defs.h: #include "simdjson/ppc64/numberparsing_defs.h" */ +/* begin file simdjson/ppc64/numberparsing_defs.h */ +#ifndef SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +#define SIMDJSON_PPC64_NUMBERPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/intrinsics.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +#if defined(__linux__) +#include +#elif defined(__FreeBSD__) +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace numberparsing { + +// we don't have appropriate instructions, so let us use a scalar function +// credit: https://johnnylee-sde.github.io/Fast-numeric-string-to-int/ +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + uint64_t val; + std::memcpy(&val, chars, sizeof(uint64_t)); +#ifdef __BIG_ENDIAN__ +#if defined(__linux__) + val = bswap_64(val); +#elif defined(__FreeBSD__) + val = bswap64(val); +#endif +#endif + val = (val & 0x0F0F0F0F0F0F0F0F) * 2561 >> 8; + val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16; + return uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32); +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace ppc64 +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_PPC64_NUMBERPARSING_DEFS_H +/* end file simdjson/ppc64/numberparsing_defs.h */ +/* including simdjson/ppc64/simd.h: #include "simdjson/ppc64/simd.h" */ +/* begin file simdjson/ppc64/simd.h */ +#ifndef SIMDJSON_PPC64_SIMD_H +#define SIMDJSON_PPC64_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace { +namespace simd { + +using __m128i = __vector unsigned char; + +template struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i &() const { + return this->value; + } + simdjson_inline operator __m128i &() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { + return vec_or(this->value, (__m128i)other); + } + simdjson_inline Child operator&(const Child other) const { + return vec_and(this->value, (__m128i)other); + } + simdjson_inline Child operator^(const Child other) const { + return vec_xor(this->value, (__m128i)other); + } + simdjson_inline Child bit_andnot(const Child other) const { + return vec_andc(this->value, (__m128i)other); + } + simdjson_inline Child &operator|=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast | other; + return *this_cast; + } + simdjson_inline Child &operator&=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast & other; + return *this_cast; + } + simdjson_inline Child &operator^=(const Child other) { + auto this_cast = static_cast(this); + *this_cast = *this_cast ^ other; + return *this_cast; + } +}; + +template > +struct base8 : base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { + return (__m128i)vec_cmpeq(lhs.value, (__m128i)rhs); + } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(simd8 prev_chunk) const { + __m128i chunk = this->value; +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve(this->value); + prev_chunk = (__m128i)vec_reve((__m128i)prev_chunk); +#endif + chunk = (__m128i)vec_sld((__m128i)prev_chunk, (__m128i)chunk, 16 - N); +#ifdef __LITTLE_ENDIAN__ + chunk = (__m128i)vec_reve((__m128i)chunk); +#endif + return chunk; + } +}; + +// SIMD byte mask type (returned by things like eq and gt) +template <> struct simd8 : base8 { + static simdjson_inline simd8 splat(bool _value) { + return (__m128i)vec_splats((unsigned char)(-(!!_value))); + } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) + : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) + : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { + __vector unsigned long long result; + const __m128i perm_mask = {0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00}; + + result = ((__vector unsigned long long)vec_vbpermq((__m128i)this->value, + (__m128i)perm_mask)); +#ifdef __LITTLE_ENDIAN__ + return static_cast(result[1]); +#else + return static_cast(result[0]); +#endif + } + simdjson_inline bool any() const { + return !vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline simd8 operator~() const { + return this->value ^ (__m128i)splat(true); + } +}; + +template struct base8_numeric : base8 { + static simdjson_inline simd8 splat(T value) { + (void)value; + return (__m128i)vec_splats(value); + } + static simdjson_inline simd8 zero() { return splat(0); } + static simdjson_inline simd8 load(const T values[16]) { + return (__m128i)(vec_vsx_ld(0, reinterpret_cast(values))); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16(T v0, T v1, T v2, T v3, T v4, + T v5, T v6, T v7, T v8, T v9, + T v10, T v11, T v12, T v13, + T v14, T v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, + v14, v15); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) + : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { + vec_vsx_st(this->value, 0, reinterpret_cast<__m128i *>(dst)); + } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { + return (__m128i)((__m128i)this->value + (__m128i)other); + } + simdjson_inline simd8 operator-(const simd8 other) const { + return (__m128i)((__m128i)this->value - (__m128i)other); + } + simdjson_inline simd8 &operator+=(const simd8 other) { + *this = *this + other; + return *static_cast *>(this); + } + simdjson_inline simd8 &operator-=(const simd8 other) { + *this = *this - other; + return *static_cast *>(this); + } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior + // for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return (__m128i)vec_perm((__m128i)lookup_table, (__m128i)lookup_table, this->value); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted + // as a bitset). Passing a 0 value for mask would be equivalent to writing out + // every byte to output. Only the first 16 - count_ones(mask) bytes of the + // result are significant but 16 bytes get written. Design consideration: it + // seems like a function with the signature simd8 compress(uint32_t mask) + // would be sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L *output) const { + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + using internal::thintable_epi8; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. +#ifdef __LITTLE_ENDIAN__ + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask1], thintable_epi8[mask2]}; +#else + __m128i shufmask = (__m128i)(__vector unsigned long long){ + thintable_epi8[mask2], thintable_epi8[mask1]}; + shufmask = (__m128i)vec_reve((__m128i)shufmask); +#endif + // we increment by 0x08 the second half of the mask + shufmask = ((__m128i)shufmask) + + ((__m128i)(__vector int){0, 0, 0x08080808, 0x08080808}); + + // this is the version "nearly pruned" + __m128i pruned = vec_perm(this->value, this->value, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + vec_vsx_ld(0, reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = vec_perm(pruned, (__m128i)vec_splats(0), compactmask); + vec_vsx_st(answer, 0, reinterpret_cast<__m128i *>(output)); + } + + template + simdjson_inline simd8 + lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4, + L replace5, L replace6, L replace7, L replace8, L replace9, + L replace10, L replace11, L replace12, L replace13, L replace14, + L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, replace4, replace5, replace6, + replace7, replace8, replace9, replace10, replace11, replace12, + replace13, replace14, replace15)); + } +}; + +// Signed bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, + int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) + : simd8((__m128i)(__vector signed char){v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10, v11, v12, v13, v14, + v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, + int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11, + int8_t v12, int8_t v13, int8_t v14, int8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Order-sensitive comparisons + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return (__m128i)vec_cmpgt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return (__m128i)vec_cmplt((__vector signed char)this->value, + (__vector signed char)(__m128i)other); + } +}; + +// Unsigned bytes +template <> struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) + : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t *values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline + simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, + uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10, + uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15) + : simd8((__m128i){v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15}) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 + repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, + uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, + uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, + uint8_t v15) { + return simd8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, + v13, v14, v15); + } + + // Saturated math + simdjson_inline simd8 + saturating_add(const simd8 other) const { + return (__m128i)vec_adds(this->value, (__m128i)other); + } + simdjson_inline simd8 + saturating_sub(const simd8 other) const { + return (__m128i)vec_subs(this->value, (__m128i)other); + } + + // Order-specific operations + simdjson_inline simd8 + max_val(const simd8 other) const { + return (__m128i)vec_max(this->value, (__m128i)other); + } + simdjson_inline simd8 + min_val(const simd8 other) const { + return (__m128i)vec_min(this->value, (__m128i)other); + } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + gt_bits(const simd8 other) const { + return this->saturating_sub(other); + } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 + lt_bits(const simd8 other) const { + return other.saturating_sub(*this); + } + simdjson_inline simd8 + operator<=(const simd8 other) const { + return other.max_val(*this) == other; + } + simdjson_inline simd8 + operator>=(const simd8 other) const { + return other.min_val(*this) == other; + } + simdjson_inline simd8 + operator>(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + simdjson_inline simd8 + operator<(const simd8 other) const { + return this->gt_bits(other).any_bits_set(); + } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { + return (__m128i)vec_cmpeq(this->value, (__m128i)vec_splats(uint8_t(0))); + } + simdjson_inline simd8 bits_not_set(simd8 bits) const { + return (*this & bits).bits_not_set(); + } + simdjson_inline simd8 any_bits_set() const { + return ~this->bits_not_set(); + } + simdjson_inline simd8 any_bits_set(simd8 bits) const { + return ~this->bits_not_set(bits); + } + simdjson_inline bool bits_not_set_anywhere() const { + return vec_all_eq(this->value, (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere() const { + return !bits_not_set_anywhere(); + } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { + return vec_all_eq(vec_and(this->value, (__m128i)bits), + (__m128i)vec_splats(0)); + } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { + return !bits_not_set_anywhere(bits); + } + template simdjson_inline simd8 shr() const { + return simd8( + (__m128i)vec_sr(this->value, (__m128i)vec_splat_u8(N))); + } + template simdjson_inline simd8 shl() const { + return simd8( + (__m128i)vec_sl(this->value, (__m128i)vec_splat_u8(N))); + } +}; + +template struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, + "PPC64 kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64 &o) = delete; // no copy allowed + simd8x64 & + operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, + const simd8 chunk2, const simd8 chunk3) + : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) + : chunks{simd8::load(ptr), simd8::load(ptr + 16), + simd8::load(ptr + 32), simd8::load(ptr + 48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr + sizeof(simd8) * 0); + this->chunks[1].store(ptr + sizeof(simd8) * 1); + this->chunks[2].store(ptr + sizeof(simd8) * 2); + this->chunks[3].store(ptr + sizeof(simd8) * 3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | + (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T *output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), + output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), + output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), + output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask()); + uint64_t r1 = this->chunks[1].to_bitmask(); + uint64_t r2 = this->chunks[2].to_bitmask(); + uint64_t r3 = this->chunks[3].to_bitmask(); + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] == mask, this->chunks[1] == mask, + this->chunks[2] == mask, this->chunks[3] == mask) + .to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64(this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3]) + .to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64(this->chunks[0] <= mask, this->chunks[1] <= mask, + this->chunks[2] <= mask, this->chunks[3] <= mask) + .to_bitmask(); + } +}; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_SIMD_INPUT_H +/* end file simdjson/ppc64/simd.h */ +/* including simdjson/ppc64/stringparsing_defs.h: #include "simdjson/ppc64/stringparsing_defs.h" */ +/* begin file simdjson/ppc64/stringparsing_defs.h */ +#ifndef SIMDJSON_PPC64_STRINGPARSING_DEFS_H +#define SIMDJSON_PPC64_STRINGPARSING_DEFS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/simd.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote + copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { + return ((bs_bits - 1) & quote_bits) != 0; + } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { + return trailing_zeroes(quote_bits); + } + simdjson_inline int backslash_index() { + return trailing_zeroes(bs_bits); + } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote +backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), + "backslash and quote finder must process fewer than " + "SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + sizeof(v0)); + v0.store(dst); + v1.store(dst + sizeof(v0)); + + // Getting a 64-bit bitmask is much cheaper than multiple 16-bit bitmasks on + // PPC; therefore, we smash them together into a 64-byte mask and get the + // bitmask from there. + uint64_t bs_and_quote = + simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_PPC64_STRINGPARSING_DEFS_H +/* end file simdjson/ppc64/stringparsing_defs.h */ + +#define SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT 1 +/* end file simdjson/ppc64/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for ppc64: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for ppc64 */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for ppc64: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::ppc64::number_type */ +using number_type = simdjson::ppc64::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for ppc64 */ +/* including simdjson/generic/ondemand/value_iterator.h for ppc64: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; +}; // value_iterator + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/value.h for ppc64: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator ppc64::ondemand::array() noexcept(false); + simdjson_inline operator ppc64::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for ppc64 */ +/* including simdjson/generic/ondemand/logger.h for ppc64: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for ppc64 */ +/* including simdjson/generic/ondemand/token_iterator.h for ppc64: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/json_iterator.h for ppc64: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/json_type.h for ppc64: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for ppc64 */ +/* including simdjson/generic/ondemand/raw_json_string.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for ppc64 */ +/* including simdjson/generic/ondemand/parser.h for ppc64: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for ppc64 */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for ppc64: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for ppc64 */ +/* including simdjson/generic/ondemand/array_iterator.h for ppc64: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/document.h for ppc64: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value. + * + * @returns A value value. + * @exception if a JSON value cannot be found + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator ppc64::ondemand::array() & noexcept(false); + simdjson_inline operator ppc64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator ppc64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator ppc64::ondemand::array() & noexcept(false); + simdjson_inline operator ppc64::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator ppc64::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator ppc64::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for ppc64 */ +/* including simdjson/generic/ondemand/document_stream.h for ppc64: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline simdjson_result operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for ppc64 */ +/* including simdjson/generic/ondemand/field.h for ppc64: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for ppc64 */ +/* including simdjson/generic/ondemand/object.h for ppc64: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for ppc64 */ +/* including simdjson/generic/ondemand/object_iterator.h for ppc64: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public ppc64::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(ppc64::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for ppc64 */ +/* including simdjson/generic/ondemand/serialization.h for ppc64: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace ppc64 { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::ppc64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for ppc64 */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for ppc64: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::array_iterator &&value +) noexcept + : ppc64::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : ppc64::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/document-inl.h for ppc64: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + iter.assert_at_document_depth(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator ppc64::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/document_stream-inl.h for ppc64: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/field-inl.h for ppc64: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/json_type-inl.h for ppc64: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/logger-inl.h for ppc64: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace ppc64 { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/object-inl.h for ppc64: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/parser-inl.h for ppc64: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for ppc64: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace ppc64 { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/serialization-inl.h for ppc64: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(ppc64::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(ppc64::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(ppc64::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace ppc64::ondemand; + ppc64::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + ppc64::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + ppc64::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(ppc64::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(ppc64::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace ppc64 { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::ppc64::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::ppc64::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/value-inl.h for ppc64: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + ppc64::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(ppc64::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator ppc64::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator ppc64::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for ppc64: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace ppc64 +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(ppc64::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ +/* end file simdjson/generic/ondemand/amalgamated.h for ppc64 */ +/* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ +/* begin file simdjson/ppc64/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/ppc64/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#undef SIMDJSON_SKIP_BACKSLASH_SHORT_CIRCUIT +/* undefining SIMDJSON_IMPLEMENTATION from "ppc64" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/ppc64/end.h */ + +#endif // SIMDJSON_PPC64_ONDEMAND_H +/* end file simdjson/ppc64/ondemand.h */ +#elif SIMDJSON_BUILTIN_IMPLEMENTATION_IS(westmere) +/* including simdjson/westmere/ondemand.h: #include "simdjson/westmere/ondemand.h" */ +/* begin file simdjson/westmere/ondemand.h */ +#ifndef SIMDJSON_WESTMERE_ONDEMAND_H +#define SIMDJSON_WESTMERE_ONDEMAND_H + +/* including simdjson/westmere/begin.h: #include "simdjson/westmere/begin.h" */ +/* begin file simdjson/westmere/begin.h */ +/* defining SIMDJSON_IMPLEMENTATION to "westmere" */ +#define SIMDJSON_IMPLEMENTATION westmere +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_TARGET_REGION("sse4.2,pclmul,popcnt") +#endif + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/bitmask.h: #include "simdjson/westmere/bitmask.h" */ +/* begin file simdjson/westmere/bitmask.h */ +#ifndef SIMDJSON_WESTMERE_BITMASK_H +#define SIMDJSON_WESTMERE_BITMASK_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// +// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. +// +// For example, prefix_xor(00100100) == 00011100 +// +simdjson_inline uint64_t prefix_xor(const uint64_t bitmask) { + // There should be no such thing with a processing supporting avx2 + // but not clmul. + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); + return _mm_cvtsi128_si64(result); +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMASK_H +/* end file simdjson/westmere/bitmask.h */ +/* including simdjson/westmere/numberparsing_defs.h: #include "simdjson/westmere/numberparsing_defs.h" */ +/* begin file simdjson/westmere/numberparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +#define SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H + +/* including simdjson/westmere/base.h: #include "simdjson/westmere/base.h" */ +/* begin file simdjson/westmere/base.h */ +#ifndef SIMDJSON_WESTMERE_BASE_H +#define SIMDJSON_WESTMERE_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +// The constructor may be executed on any host, so we take care not to use SIMDJSON_TARGET_WESTMERE +namespace simdjson { +/** + * Implementation for Westmere (Intel SSE4.2). + */ +namespace westmere { + +class implementation; + +namespace { +namespace simd { + +template struct simd8; +template struct simd8x64; + +} // namespace simd +} // unnamed namespace + +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BASE_H +/* end file simdjson/westmere/base.h */ +/* including simdjson/westmere/intrinsics.h: #include "simdjson/westmere/intrinsics.h" */ +/* begin file simdjson/westmere/intrinsics.h */ +#ifndef SIMDJSON_WESTMERE_INTRINSICS_H +#define SIMDJSON_WESTMERE_INTRINSICS_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if SIMDJSON_VISUAL_STUDIO +// under clang within visual studio, this will include +#include // visual studio or clang +#else +#include // elsewhere +#endif // SIMDJSON_VISUAL_STUDIO + + +#if SIMDJSON_CLANG_VISUAL_STUDIO +/** + * You are not supposed, normally, to include these + * headers directly. Instead you should either include intrin.h + * or x86intrin.h. However, when compiling with clang + * under Windows (i.e., when _MSC_VER is set), these headers + * only get included *if* the corresponding features are detected + * from macros: + */ +#include // for _mm_alignr_epi8 +#include // for _mm_clmulepi64_si128 +#endif + +static_assert(sizeof(__m128i) <= simdjson::SIMDJSON_PADDING, "insufficient padding for westmere"); + +#endif // SIMDJSON_WESTMERE_INTRINSICS_H +/* end file simdjson/westmere/intrinsics.h */ + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/numberparsing_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace numberparsing { + +/** @private */ +static simdjson_inline uint32_t parse_eight_digits_unrolled(const uint8_t *chars) { + // this actually computes *16* values so we are being wasteful. + const __m128i ascii0 = _mm_set1_epi8('0'); + const __m128i mul_1_10 = + _mm_setr_epi8(10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1, 10, 1); + const __m128i mul_1_100 = _mm_setr_epi16(100, 1, 100, 1, 100, 1, 100, 1); + const __m128i mul_1_10000 = + _mm_setr_epi16(10000, 1, 10000, 1, 10000, 1, 10000, 1); + const __m128i input = _mm_sub_epi8( + _mm_loadu_si128(reinterpret_cast(chars)), ascii0); + const __m128i t1 = _mm_maddubs_epi16(input, mul_1_10); + const __m128i t2 = _mm_madd_epi16(t1, mul_1_100); + const __m128i t3 = _mm_packus_epi32(t2, t2); + const __m128i t4 = _mm_madd_epi16(t3, mul_1_10000); + return _mm_cvtsi128_si32( + t4); // only captures the sum of the first 8 digits, drop the rest +} + +/** @private */ +simdjson_inline internal::value128 full_multiplication(uint64_t value1, uint64_t value2) { + internal::value128 answer; +#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS +#ifdef _M_ARM64 + // ARM64 has native support for 64-bit multiplications, no need to emultate + answer.high = __umulh(value1, value2); + answer.low = value1 * value2; +#else + answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 +#endif // _M_ARM64 +#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS + __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#endif + return answer; +} + +} // namespace numberparsing +} // namespace westmere +} // namespace simdjson + +#define SIMDJSON_SWAR_NUMBER_PARSING 1 + +#endif // SIMDJSON_WESTMERE_NUMBERPARSING_DEFS_H +/* end file simdjson/westmere/numberparsing_defs.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ +/* including simdjson/westmere/stringparsing_defs.h: #include "simdjson/westmere/stringparsing_defs.h" */ +/* begin file simdjson/westmere/stringparsing_defs.h */ +#ifndef SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +#define SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H + +/* including simdjson/westmere/bitmanipulation.h: #include "simdjson/westmere/bitmanipulation.h" */ +/* begin file simdjson/westmere/bitmanipulation.h */ +#ifndef SIMDJSON_WESTMERE_BITMANIPULATION_H +#define SIMDJSON_WESTMERE_BITMANIPULATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/intrinsics.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { + +// We sometimes call trailing_zero on inputs that are zero, +// but the algorithms do not end up using the returned value. +// Sadly, sanitizers are not smart enough to figure it out. +SIMDJSON_NO_SANITIZE_UNDEFINED +// This function can be used safely even if not all bytes have been +// initialized. +// See issue https://github.com/simdjson/simdjson/issues/1965 +SIMDJSON_NO_SANITIZE_MEMORY +simdjson_inline int trailing_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward64(&ret, input_num); + return (int)ret; +#else // SIMDJSON_REGULAR_VISUAL_STUDIO + return __builtin_ctzll(input_num); +#endif // SIMDJSON_REGULAR_VISUAL_STUDIO +} + +/* result might be undefined when input_num is zero */ +simdjson_inline uint64_t clear_lowest_bit(uint64_t input_num) { + return input_num & (input_num-1); +} + +/* result might be undefined when input_num is zero */ +simdjson_inline int leading_zeroes(uint64_t input_num) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + if (_BitScanReverse64(&leading_zero, input_num)) + return (int)(63 - leading_zero); + else + return 64; +#else + return __builtin_clzll(input_num); +#endif// SIMDJSON_REGULAR_VISUAL_STUDIO +} + +#if SIMDJSON_REGULAR_VISUAL_STUDIO +simdjson_inline unsigned __int64 count_ones(uint64_t input_num) { + // note: we do not support legacy 32-bit Windows in this kernel + return __popcnt64(input_num);// Visual Studio wants two underscores +} +#else +simdjson_inline long long int count_ones(uint64_t input_num) { + return _popcnt64(input_num); +} +#endif + +simdjson_inline bool add_overflow(uint64_t value1, uint64_t value2, + uint64_t *result) { +#if SIMDJSON_REGULAR_VISUAL_STUDIO + return _addcarry_u64(0, value1, value2, + reinterpret_cast(result)); +#else + return __builtin_uaddll_overflow(value1, value2, + reinterpret_cast(result)); +#endif +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_BITMANIPULATION_H +/* end file simdjson/westmere/bitmanipulation.h */ +/* including simdjson/westmere/simd.h: #include "simdjson/westmere/simd.h" */ +/* begin file simdjson/westmere/simd.h */ +#ifndef SIMDJSON_WESTMERE_SIMD_H +#define SIMDJSON_WESTMERE_SIMD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/bitmanipulation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/simdprune_tables.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace { +namespace simd { + + template + struct base { + __m128i value; + + // Zero constructor + simdjson_inline base() : value{__m128i()} {} + + // Conversion from SIMD register + simdjson_inline base(const __m128i _value) : value(_value) {} + + // Conversion to SIMD register + simdjson_inline operator const __m128i&() const { return this->value; } + simdjson_inline operator __m128i&() { return this->value; } + + // Bit operations + simdjson_inline Child operator|(const Child other) const { return _mm_or_si128(*this, other); } + simdjson_inline Child operator&(const Child other) const { return _mm_and_si128(*this, other); } + simdjson_inline Child operator^(const Child other) const { return _mm_xor_si128(*this, other); } + simdjson_inline Child bit_andnot(const Child other) const { return _mm_andnot_si128(other, *this); } + simdjson_inline Child& operator|=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast | other; return *this_cast; } + simdjson_inline Child& operator&=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast & other; return *this_cast; } + simdjson_inline Child& operator^=(const Child other) { auto this_cast = static_cast(this); *this_cast = *this_cast ^ other; return *this_cast; } + }; + + template> + struct base8: base> { + typedef uint16_t bitmask_t; + typedef uint32_t bitmask2_t; + + simdjson_inline base8() : base>() {} + simdjson_inline base8(const __m128i _value) : base>(_value) {} + + friend simdjson_inline Mask operator==(const simd8 lhs, const simd8 rhs) { return _mm_cmpeq_epi8(lhs, rhs); } + + static const int SIZE = sizeof(base>::value); + + template + simdjson_inline simd8 prev(const simd8 prev_chunk) const { + return _mm_alignr_epi8(*this, prev_chunk, 16 - N); + } + }; + + // SIMD byte mask type (returned by things like eq and gt) + template<> + struct simd8: base8 { + static simdjson_inline simd8 splat(bool _value) { return _mm_set1_epi8(uint8_t(-(!!_value))); } + + simdjson_inline simd8() : base8() {} + simdjson_inline simd8(const __m128i _value) : base8(_value) {} + // Splat constructor + simdjson_inline simd8(bool _value) : base8(splat(_value)) {} + + simdjson_inline int to_bitmask() const { return _mm_movemask_epi8(*this); } + simdjson_inline bool any() const { return !_mm_testz_si128(*this, *this); } + simdjson_inline simd8 operator~() const { return *this ^ true; } + }; + + template + struct base8_numeric: base8 { + static simdjson_inline simd8 splat(T _value) { return _mm_set1_epi8(_value); } + static simdjson_inline simd8 zero() { return _mm_setzero_si128(); } + static simdjson_inline simd8 load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast(values)); + } + // Repeat 16 values as many times as necessary (usually for lookup tables) + static simdjson_inline simd8 repeat_16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + simdjson_inline base8_numeric() : base8() {} + simdjson_inline base8_numeric(const __m128i _value) : base8(_value) {} + + // Store to array + simdjson_inline void store(T dst[16]) const { return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); } + + // Override to distinguish from bool version + simdjson_inline simd8 operator~() const { return *this ^ 0xFFu; } + + // Addition/subtraction are the same for signed and unsigned + simdjson_inline simd8 operator+(const simd8 other) const { return _mm_add_epi8(*this, other); } + simdjson_inline simd8 operator-(const simd8 other) const { return _mm_sub_epi8(*this, other); } + simdjson_inline simd8& operator+=(const simd8 other) { *this = *this + other; return *static_cast*>(this); } + simdjson_inline simd8& operator-=(const simd8 other) { *this = *this - other; return *static_cast*>(this); } + + // Perform a lookup assuming the value is between 0 and 16 (undefined behavior for out of range values) + template + simdjson_inline simd8 lookup_16(simd8 lookup_table) const { + return _mm_shuffle_epi8(lookup_table, *this); + } + + // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). + // Passing a 0 value for mask would be equivalent to writing out every byte to output. + // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes + // get written. + // Design consideration: it seems like a function with the + // signature simd8 compress(uint32_t mask) would be + // sensible, but the AVX ISA makes this kind of approach difficult. + template + simdjson_inline void compress(uint16_t mask, L * output) const { + using internal::thintable_epi8; + using internal::BitsSetTable256mul2; + using internal::pshufb_combine_table; + // this particular implementation was inspired by work done by @animetosho + // we do it in two steps, first 8 bytes and then second 8 bytes + uint8_t mask1 = uint8_t(mask); // least significant 8 bits + uint8_t mask2 = uint8_t(mask >> 8); // most significant 8 bits + // next line just loads the 64-bit values thintable_epi8[mask1] and + // thintable_epi8[mask2] into a 128-bit register, using only + // two instructions on most compilers. + __m128i shufmask = _mm_set_epi64x(thintable_epi8[mask2], thintable_epi8[mask1]); + // we increment by 0x08 the second half of the mask + shufmask = + _mm_add_epi8(shufmask, _mm_set_epi32(0x08080808, 0x08080808, 0, 0)); + // this is the version "nearly pruned" + __m128i pruned = _mm_shuffle_epi8(*this, shufmask); + // we still need to put the two halves together. + // we compute the popcount of the first half: + int pop1 = BitsSetTable256mul2[mask1]; + // then load the corresponding mask, what it does is to write + // only the first pop1 bytes from the first 8 bytes, and then + // it fills in with the bytes from the second 8 bytes + some filling + // at the end. + __m128i compactmask = + _mm_loadu_si128(reinterpret_cast(pshufb_combine_table + pop1 * 8)); + __m128i answer = _mm_shuffle_epi8(pruned, compactmask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(output), answer); + } + + template + simdjson_inline simd8 lookup_16( + L replace0, L replace1, L replace2, L replace3, + L replace4, L replace5, L replace6, L replace7, + L replace8, L replace9, L replace10, L replace11, + L replace12, L replace13, L replace14, L replace15) const { + return lookup_16(simd8::repeat_16( + replace0, replace1, replace2, replace3, + replace4, replace5, replace6, replace7, + replace8, replace9, replace10, replace11, + replace12, replace13, replace14, replace15 + )); + } + }; + + // Signed bytes + template<> + struct simd8 : base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(int8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const int8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5, int8_t v6, int8_t v7, + int8_t v8, int8_t v9, int8_t v10, int8_t v11, int8_t v12, int8_t v13, int8_t v14, int8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Order-sensitive comparisons + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epi8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epi8(*this, other); } + simdjson_inline simd8 operator>(const simd8 other) const { return _mm_cmpgt_epi8(*this, other); } + simdjson_inline simd8 operator<(const simd8 other) const { return _mm_cmpgt_epi8(other, *this); } + }; + + // Unsigned bytes + template<> + struct simd8: base8_numeric { + simdjson_inline simd8() : base8_numeric() {} + simdjson_inline simd8(const __m128i _value) : base8_numeric(_value) {} + // Splat constructor + simdjson_inline simd8(uint8_t _value) : simd8(splat(_value)) {} + // Array constructor + simdjson_inline simd8(const uint8_t* values) : simd8(load(values)) {} + // Member-by-member initialization + simdjson_inline simd8( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) : simd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + // Repeat 16 values as many times as necessary (usually for lookup tables) + simdjson_inline static simd8 repeat_16( + uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5, uint8_t v6, uint8_t v7, + uint8_t v8, uint8_t v9, uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15 + ) { + return simd8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + // Saturated math + simdjson_inline simd8 saturating_add(const simd8 other) const { return _mm_adds_epu8(*this, other); } + simdjson_inline simd8 saturating_sub(const simd8 other) const { return _mm_subs_epu8(*this, other); } + + // Order-specific operations + simdjson_inline simd8 max_val(const simd8 other) const { return _mm_max_epu8(*this, other); } + simdjson_inline simd8 min_val(const simd8 other) const { return _mm_min_epu8(*this, other); } + // Same as >, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 gt_bits(const simd8 other) const { return this->saturating_sub(other); } + // Same as <, but only guarantees true is nonzero (< guarantees true = -1) + simdjson_inline simd8 lt_bits(const simd8 other) const { return other.saturating_sub(*this); } + simdjson_inline simd8 operator<=(const simd8 other) const { return other.max_val(*this) == other; } + simdjson_inline simd8 operator>=(const simd8 other) const { return other.min_val(*this) == other; } + simdjson_inline simd8 operator>(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + simdjson_inline simd8 operator<(const simd8 other) const { return this->gt_bits(other).any_bits_set(); } + + // Bit-specific operations + simdjson_inline simd8 bits_not_set() const { return *this == uint8_t(0); } + simdjson_inline simd8 bits_not_set(simd8 bits) const { return (*this & bits).bits_not_set(); } + simdjson_inline simd8 any_bits_set() const { return ~this->bits_not_set(); } + simdjson_inline simd8 any_bits_set(simd8 bits) const { return ~this->bits_not_set(bits); } + simdjson_inline bool is_ascii() const { return _mm_movemask_epi8(*this) == 0; } + simdjson_inline bool bits_not_set_anywhere() const { return _mm_testz_si128(*this, *this); } + simdjson_inline bool any_bits_set_anywhere() const { return !bits_not_set_anywhere(); } + simdjson_inline bool bits_not_set_anywhere(simd8 bits) const { return _mm_testz_si128(*this, bits); } + simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return !bits_not_set_anywhere(bits); } + template + simdjson_inline simd8 shr() const { return simd8(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N); } + template + simdjson_inline simd8 shl() const { return simd8(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N); } + // Get one of the bits and make a bitmask out of it. + // e.g. value.get_bit<7>() gets the high bit + template + simdjson_inline int get_bit() const { return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); } + }; + + template + struct simd8x64 { + static constexpr int NUM_CHUNKS = 64 / sizeof(simd8); + static_assert(NUM_CHUNKS == 4, "Westmere kernel should use four registers per 64-byte block."); + const simd8 chunks[NUM_CHUNKS]; + + simd8x64(const simd8x64& o) = delete; // no copy allowed + simd8x64& operator=(const simd8& other) = delete; // no assignment allowed + simd8x64() = delete; // no default constructor allowed + + simdjson_inline simd8x64(const simd8 chunk0, const simd8 chunk1, const simd8 chunk2, const simd8 chunk3) : chunks{chunk0, chunk1, chunk2, chunk3} {} + simdjson_inline simd8x64(const T ptr[64]) : chunks{simd8::load(ptr), simd8::load(ptr+16), simd8::load(ptr+32), simd8::load(ptr+48)} {} + + simdjson_inline void store(T ptr[64]) const { + this->chunks[0].store(ptr+sizeof(simd8)*0); + this->chunks[1].store(ptr+sizeof(simd8)*1); + this->chunks[2].store(ptr+sizeof(simd8)*2); + this->chunks[3].store(ptr+sizeof(simd8)*3); + } + + simdjson_inline simd8 reduce_or() const { + return (this->chunks[0] | this->chunks[1]) | (this->chunks[2] | this->chunks[3]); + } + + simdjson_inline uint64_t compress(uint64_t mask, T * output) const { + this->chunks[0].compress(uint16_t(mask), output); + this->chunks[1].compress(uint16_t(mask >> 16), output + 16 - count_ones(mask & 0xFFFF)); + this->chunks[2].compress(uint16_t(mask >> 32), output + 32 - count_ones(mask & 0xFFFFFFFF)); + this->chunks[3].compress(uint16_t(mask >> 48), output + 48 - count_ones(mask & 0xFFFFFFFFFFFF)); + return 64 - count_ones(mask); + } + + simdjson_inline uint64_t to_bitmask() const { + uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() ); + uint64_t r1 = this->chunks[1].to_bitmask() ; + uint64_t r2 = this->chunks[2].to_bitmask() ; + uint64_t r3 = this->chunks[3].to_bitmask() ; + return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48); + } + + simdjson_inline uint64_t eq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] == mask, + this->chunks[1] == mask, + this->chunks[2] == mask, + this->chunks[3] == mask + ).to_bitmask(); + } + + simdjson_inline uint64_t eq(const simd8x64 &other) const { + return simd8x64( + this->chunks[0] == other.chunks[0], + this->chunks[1] == other.chunks[1], + this->chunks[2] == other.chunks[2], + this->chunks[3] == other.chunks[3] + ).to_bitmask(); + } + + simdjson_inline uint64_t lteq(const T m) const { + const simd8 mask = simd8::splat(m); + return simd8x64( + this->chunks[0] <= mask, + this->chunks[1] <= mask, + this->chunks[2] <= mask, + this->chunks[3] <= mask + ).to_bitmask(); + } + }; // struct simd8x64 + +} // namespace simd +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_SIMD_INPUT_H +/* end file simdjson/westmere/simd.h */ + +namespace simdjson { +namespace westmere { +namespace { + +using namespace simd; + +// Holds backslashes and quotes locations. +struct backslash_and_quote { +public: + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static backslash_and_quote copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_quote_first() { return ((bs_bits - 1) & quote_bits) != 0; } + simdjson_inline bool has_backslash() { return bs_bits != 0; } + simdjson_inline int quote_index() { return trailing_zeroes(quote_bits); } + simdjson_inline int backslash_index() { return trailing_zeroes(bs_bits); } + + uint32_t bs_bits; + uint32_t quote_bits; +}; // struct backslash_and_quote + +simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { + // this can read up to 31 bytes beyond the buffer size, but we require + // SIMDJSON_PADDING of padding + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "backslash and quote finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v0(src); + simd8 v1(src + 16); + v0.store(dst); + v1.store(dst + 16); + uint64_t bs_and_quote = simd8x64(v0 == '\\', v1 == '\\', v0 == '"', v1 == '"').to_bitmask(); + return { + uint32_t(bs_and_quote), // bs_bits + uint32_t(bs_and_quote >> 32) // quote_bits + }; +} + +} // unnamed namespace +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_WESTMERE_STRINGPARSING_DEFS_H +/* end file simdjson/westmere/stringparsing_defs.h */ +/* end file simdjson/westmere/begin.h */ +/* including simdjson/generic/ondemand/amalgamated.h for westmere: #include "simdjson/generic/ondemand/amalgamated.h" */ +/* begin file simdjson/generic/ondemand/amalgamated.h for westmere */ +#if defined(SIMDJSON_CONDITIONAL_INCLUDE) && !defined(SIMDJSON_GENERIC_ONDEMAND_DEPENDENCIES_H) +#error simdjson/generic/ondemand/dependencies.h must be included before simdjson/generic/ondemand/amalgamated.h! +#endif + +// Stuff other things depend on +/* including simdjson/generic/ondemand/base.h for westmere: #include "simdjson/generic/ondemand/base.h" */ +/* begin file simdjson/generic/ondemand/base.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_BASE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_BASE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +/** + * A fast, simple, DOM-like interface that parses JSON as you use it. + * + * Designed for maximum speed and a lower memory profile. + */ +namespace ondemand { + +/** Represents the depth of a JSON value (number of nested arrays/objects). */ +using depth_t = int32_t; + +/** @copydoc simdjson::westmere::number_type */ +using number_type = simdjson::westmere::number_type; + +/** @private Position in the JSON buffer indexes */ +using token_position = const uint32_t *; + +class array; +class array_iterator; +class document; +class document_reference; +class document_stream; +class field; +class json_iterator; +enum class json_type; +struct number; +class object; +class object_iterator; +class parser; +class raw_json_string; +class token_iterator; +class value; +class value_iterator; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_BASE_H +/* end file simdjson/generic/ondemand/base.h for westmere */ +/* including simdjson/generic/ondemand/value_iterator.h for westmere: #include "simdjson/generic/ondemand/value_iterator.h" */ +/* begin file simdjson/generic/ondemand/value_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * Iterates through a single JSON value at a particular depth. + * + * Does not keep track of the type of value: provides methods for objects, arrays and scalars and expects + * the caller to call the right ones. + * + * @private This is not intended for external use. + */ +class value_iterator { +protected: + /** The underlying JSON iterator */ + json_iterator *_json_iter{}; + /** The depth of this value */ + depth_t _depth{}; + /** + * The starting token index for this value + */ + token_position _start_position{}; + +public: + simdjson_inline value_iterator() noexcept = default; + + /** + * Denote that we're starting a document. + */ + simdjson_inline void start_document() noexcept; + + /** + * Skips a non-iterated or partially-iterated JSON value, whether it is a scalar, array or object. + * + * Optimized for scalars. + */ + simdjson_warn_unused simdjson_inline error_code skip_child() noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is at the start of the value + */ + simdjson_inline bool at_start() const noexcept; + + /** + * Tell whether the value is open--if the value has not been used, or the array/object is still open. + */ + simdjson_inline bool is_open() const noexcept; + + /** + * Tell whether the value is at an object's first field (just after the {). + */ + simdjson_inline bool at_first_field() const noexcept; + + /** + * Abandon all iteration. + */ + simdjson_inline void abandon() noexcept; + + /** + * Get the child value as a value_iterator. + */ + simdjson_inline value_iterator child_value() const noexcept; + + /** + * Get the depth of this value. + */ + simdjson_inline int32_t depth() const noexcept; + + /** + * Get the JSON type of this value. + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() const noexcept; + + /** + * @addtogroup object Object iteration + * + * Methods to iterate and find object fields. These methods generally *assume* the value is + * actually an object; the caller is responsible for keeping track of that fact. + * + * @{ + */ + + /** + * Start an object iteration. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + */ + simdjson_warn_unused simdjson_inline simdjson_result start_object() noexcept; + /** + * Start an object iteration from the root. + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_object() noexcept; + /** + * Checks whether an object could be started from the root. May be called by start_root_object. + * + * @returns SUCCESS if it is possible to safely start an object from the root (document level). + * @error INCORRECT_TYPE if there is no opening { + * @error TAPE_ERROR if there is no matching } at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_object() noexcept; + /** + * Start an object iteration after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; + /** + * Start an object iteration from the root, after the user has already checked and moved past the {. + * + * Does not move the iterator unless the object is empty ({}). + * + * @returns Whether the object had any fields (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; + + /** + * Moves to the next field in an object. + * + * Looks for , and }. If } is found, the object is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return whether there is another field in the object. + * @error TAPE_ERROR If there is a comma missing between fields. + * @error TAPE_ERROR If there is a comma, but not enough tokens remaining to have a key, :, and value. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_field() noexcept; + + /** + * Get the current field's key. + */ + simdjson_warn_unused simdjson_inline simdjson_result field_key() noexcept; + + /** + * Pass the : in the field and move to its value. + */ + simdjson_warn_unused simdjson_inline error_code field_value() noexcept; + + /** + * Find the next field with the given key. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline error_code find_field(const std::string_view key) noexcept; + + /** + * Find the next field with the given key, *without* unescaping. This assumes object order: it + * will not find the field if it was already passed when looking for some *other* field. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_raw(const std::string_view key) noexcept; + + /** + * Find the field with the given key without regard to order, and *without* unescaping. + * + * This is an unordered object lookup: if the field is not found initially, it will cycle around and scan from the beginning. + * + * Assumes you have called next_field() or otherwise matched the previous value. + * + * This means the iterator must be sitting at the next key: + * + * ``` + * { "a": 1, "b": 2 } + * ^ + * ``` + * + * Key is *raw JSON,* meaning it will be matched against the verbatim JSON without attempting to + * unescape it. This works well for typical ASCII and UTF-8 keys (almost all of them), but may + * fail to match some keys with escapes (\u, \n, etc.). + */ + simdjson_warn_unused simdjson_inline simdjson_result find_field_unordered_raw(const std::string_view key) noexcept; + + /** @} */ + + /** + * @addtogroup array Array iteration + * Methods to iterate over array elements. These methods generally *assume* the value is actually + * an object; the caller is responsible for keeping track of that fact. + * @{ + */ + + /** + * Check for an opening [ and start an array iteration. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + */ + simdjson_warn_unused simdjson_inline simdjson_result start_array() noexcept; + /** + * Check for an opening [ and start an array iteration while at the root. + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline simdjson_result start_root_array() noexcept; + /** + * Checks whether an array could be started from the root. May be called by start_root_array. + * + * @returns SUCCESS if it is possible to safely start an array from the root (document level). + * @error INCORRECT_TYPE If there is no [. + * @error TAPE_ERROR if there is no matching ] at end of document + */ + simdjson_warn_unused simdjson_inline error_code check_root_array() noexcept; + /** + * Start an array iteration, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; + /** + * Start an array iteration from the root, after the user has already checked and moved past the [. + * + * Does not move the iterator unless the array is empty ([]). + * + * @returns Whether the array had any elements (returns false for empty). + * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* + * array or object is incomplete). + */ + simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; + + /** + * Moves to the next element in an array. + * + * Looks for , and ]. If ] is found, the array is finished and the iterator advances past it. + * Otherwise, it advances to the next value. + * + * @return Whether there is another element in the array. + * @error TAPE_ERROR If there is a comma missing between elements. + */ + simdjson_warn_unused simdjson_inline simdjson_result has_next_element() noexcept; + + /** + * Get a child value iterator. + */ + simdjson_warn_unused simdjson_inline value_iterator child() const noexcept; + + /** @} */ + + /** + * @defgroup scalar Scalar values + * @addtogroup scalar + * @{ + */ + + simdjson_warn_unused simdjson_inline simdjson_result get_string(bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_bool() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_null() noexcept; + simdjson_warn_unused simdjson_inline bool is_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_integer() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + simdjson_warn_unused simdjson_inline simdjson_result get_root_string(bool check_trailing, bool allow_replacement) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_wobbly_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_raw_json_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_uint64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_int64_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_double_in_string(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_bool(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline bool is_root_negative() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_integer(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number_type(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result get_root_number(bool check_trailing) noexcept; + simdjson_warn_unused simdjson_inline simdjson_result is_root_null(bool check_trailing) noexcept; + + simdjson_inline error_code error() const noexcept; + simdjson_inline uint8_t *&string_buf_loc() noexcept; + simdjson_inline const json_iterator &json_iter() const noexcept; + simdjson_inline json_iterator &json_iter() noexcept; + + simdjson_inline void assert_is_valid() const noexcept; + simdjson_inline bool is_valid() const noexcept; + + /** @} */ +protected: + /** + * Restarts an array iteration. + * @returns Whether the array has any elements (returns false for empty). + */ + simdjson_inline simdjson_result reset_array() noexcept; + /** + * Restarts an object iteration. + * @returns Whether the object has any fields (returns false for empty). + */ + simdjson_inline simdjson_result reset_object() noexcept; + /** + * move_at_start(): moves us so that we are pointing at the beginning of + * the container. It updates the index so that at_start() is true and it + * syncs the depth. The user can then create a new container instance. + * + * Usage: used with value::count_elements(). + **/ + simdjson_inline void move_at_start() noexcept; + + /** + * move_at_container_start(): moves us so that we are pointing at the beginning of + * the container so that assert_at_container_start() passes. + * + * Usage: used with reset_array() and reset_object(). + **/ + simdjson_inline void move_at_container_start() noexcept; + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + simdjson_inline value_iterator(json_iterator *json_iter, depth_t depth, token_position start_index) noexcept; + + simdjson_inline simdjson_result parse_null(const uint8_t *json) const noexcept; + simdjson_inline simdjson_result parse_bool(const uint8_t *json) const noexcept; + simdjson_inline const uint8_t *peek_start() const noexcept; + simdjson_inline uint32_t peek_start_length() const noexcept; + + /** + * The general idea of the advance_... methods and the peek_* methods + * is that you first peek and check that you have desired type. If you do, + * and only if you do, then you advance. + * + * We used to unconditionally advance. But this made reasoning about our + * current state difficult. + * Suppose you always advance. Look at the 'value' matching the key + * "shadowable" in the following example... + * + * ({"globals":{"a":{"shadowable":[}}}}) + * + * If the user thinks it is a Boolean and asks for it, then we check the '[', + * decide it is not a Boolean, but still move into the next character ('}'). Now + * we are left pointing at '}' right after a '['. And we have not yet reported + * an error, only that we do not have a Boolean. + * + * If, instead, you just stand your ground until it is content that you know, then + * you will only even move beyond the '[' if the user tells you that you have an + * array. So you will be at the '}' character inside the array and, hopefully, you + * will then catch the error because an array cannot start with '}', but the code + * processing Boolean values does not know this. + * + * So the contract is: first call 'peek_...' and then call 'advance_...' only + * if you have determined that it is a type you can handle. + * + * Unfortunately, it makes the code more verbose, longer and maybe more error prone. + */ + + simdjson_inline void advance_scalar(const char *type) noexcept; + simdjson_inline void advance_root_scalar(const char *type) noexcept; + simdjson_inline void advance_non_root_scalar(const char *type) noexcept; + + simdjson_inline const uint8_t *peek_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_root_scalar(const char *type) noexcept; + simdjson_inline const uint8_t *peek_non_root_scalar(const char *type) noexcept; + + + simdjson_inline error_code start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept; + simdjson_inline error_code end_container() noexcept; + + /** + * Advance to a place expecting a value (increasing depth). + * + * @return The current token (the one left behind). + * @error TAPE_ERROR If the document ended early. + */ + simdjson_inline simdjson_result advance_to_value() noexcept; + + simdjson_inline error_code incorrect_type_error(const char *message) const noexcept; + simdjson_inline error_code error_unless_more_tokens(uint32_t tokens=1) const noexcept; + + simdjson_inline bool is_at_start() const noexcept; + /** + * is_at_iterator_start() returns true on an array or object after it has just been + * created, whether the instance is empty or not. + * + * Usage: used by array::begin() in debug mode (SIMDJSON_DEVELOPMENT_CHECKS) + */ + simdjson_inline bool is_at_iterator_start() const noexcept; + + /** + * Assuming that we are within an object, this returns true if we + * are pointing at a key. + * + * Usage: the skip_child() method should never be used while we are pointing + * at a key inside an object. + */ + simdjson_inline bool is_at_key() const noexcept; + + inline void assert_at_start() const noexcept; + inline void assert_at_container_start() const noexcept; + inline void assert_at_root() const noexcept; + inline void assert_at_child() const noexcept; + inline void assert_at_next() const noexcept; + inline void assert_at_non_root_start() const noexcept; + + /** Get the starting position of this value */ + simdjson_inline token_position start_position() const noexcept; + + /** @copydoc error_code json_iterator::position() const noexcept; */ + simdjson_inline token_position position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position last_position() const noexcept; + /** @copydoc error_code json_iterator::end_position() const noexcept; */ + simdjson_inline token_position end_position() const noexcept; + /** @copydoc error_code json_iterator::report_error(error_code error, const char *message) noexcept; */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + friend class document; + friend class object; + friend class array; + friend class value; +}; // value_iterator + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::value_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_H +/* end file simdjson/generic/ondemand/value_iterator.h for westmere */ +/* including simdjson/generic/ondemand/value.h for westmere: #include "simdjson/generic/ondemand/value.h" */ +/* begin file simdjson/generic/ondemand/value.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * An ephemeral JSON value returned during iteration. It is only valid for as long as you do + * not access more data in the JSON document. + */ +class value { +public: + /** + * Create a new invalid value. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline value() noexcept = default; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) noexcept; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() noexcept; + + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() noexcept; + + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + + /** + * Cast this JSON value (inside string) to a unsigned integer. + * + * @returns A unsigned 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * Important: a value should be consumed once. Calling get_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + + + /** + * Cast this JSON value to a "wobbly" string. + * + * The string is may not be a valid UTF-8 string. + * See https://simonsapin.github.io/wtf-8/ + * + * Important: a value should be consumed once. Calling get_wobbly_string() twice on the same value + * is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Equivalent to get(). + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); +#endif + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + * + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * Performance hint: You should only call count_elements() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method on the object instance. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @return The type of JSON value (json_type::array, json_type::object, json_type::string, + * json_type::number, json_type::boolean, or json_type::null). + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the value is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the value is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the value is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * Performance note: if you call this function systematically + * before parsing a number, you may have fallen for a performance + * anti-pattern. + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + * + * Performance note: this is designed with performance in mind. When + * calling 'get_number()', you scan the number string only once, determining + * efficiently the type and storing it in an efficient manner. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. However, if this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view is guaranteed to be + * a non-space token. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline std::string_view raw_json_token() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + simdjson_inline simdjson_result current_location() noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. + * + * Calling at_pointer() on non-document instances (e.g., arrays and objects) is not + * standardized (by RFC 6901). We provide some experimental support for JSON pointers + * on non-document instances. Yet it is not the case when calling at_pointer on an array + * or an object instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + +protected: + /** + * Create a value. + */ + simdjson_inline value(const value_iterator &iter) noexcept; + + /** + * Skip this value, allowing iteration to continue. + */ + simdjson_inline void skip() noexcept; + + /** + * Start a value at the current position. + * + * (It should already be started; this is just a self-documentation method.) + */ + static simdjson_inline value start(const value_iterator &iter) noexcept; + + /** + * Resume a value. + */ + static simdjson_inline value resume(const value_iterator &iter) noexcept; + + /** + * Get the object, starting or resuming it as necessary + */ + simdjson_inline simdjson_result start_or_resume_object() noexcept; + + // simdjson_inline void log_value(const char *type) const noexcept; + // simdjson_inline void log_error(const char *message) const noexcept; + + value_iterator iter{}; + + friend class document; + friend class array_iterator; + friend class field; + friend class object; + friend struct simdjson_result; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::value &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result get_array() noexcept; + simdjson_inline simdjson_result get_object() noexcept; + + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() noexcept; + + template simdjson_inline error_code get(T &out) noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator westmere::ondemand::array() noexcept(false); + simdjson_inline operator westmere::ondemand::object() noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) noexcept; + + /** + * Get the type of this JSON value. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + */ + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + + /** @copydoc simdjson_inline std::string_view value::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** @copydoc simdjson_inline simdjson_result current_location() noexcept */ + simdjson_inline simdjson_result current_location() noexcept; + /** @copydoc simdjson_inline int32_t current_depth() const noexcept */ + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_H +/* end file simdjson/generic/ondemand/value.h for westmere */ +/* including simdjson/generic/ondemand/logger.h for westmere: #include "simdjson/generic/ondemand/logger.h" */ +/* begin file simdjson/generic/ondemand/logger.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +// Logging should be free unless SIMDJSON_VERBOSE_LOGGING is set. Importantly, it is critical +// that the call to the log functions be side-effect free. Thus, for example, you should not +// create temporary std::string instances. +namespace logger { + +enum class log_level : int32_t { + info = 0, + error = 1 +}; + +#if SIMDJSON_VERBOSE_LOGGING + static constexpr const bool LOG_ENABLED = true; +#else + static constexpr const bool LOG_ENABLED = false; +#endif + +// We do not want these functions to be 'really inlined' since real inlining is +// for performance purposes and if you are using the loggers, you do not care about +// performance (or should not). +static inline void log_headers() noexcept; +// If args are provided, title will be treated as format string +template +static inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +template +static inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; +static inline void log_event(const json_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_value(const json_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail="") noexcept; +static inline void log_start_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const json_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; + +static inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail="") noexcept; +static inline void log_error(const json_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +static inline void log_event(const value_iterator &iter, const char *type, std::string_view detail="", int delta=0, int depth_delta=0) noexcept; +static inline void log_value(const value_iterator &iter, const char *type, std::string_view detail="", int delta=-1, int depth_delta=0) noexcept; +static inline void log_start_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_end_value(const value_iterator &iter, const char *type, int delta=-1, int depth_delta=0) noexcept; +static inline void log_error(const value_iterator &iter, const char *error, const char *detail="", int delta=-1, int depth_delta=0) noexcept; + +} // namespace logger +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_H +/* end file simdjson/generic/ondemand/logger.h for westmere */ +/* including simdjson/generic/ondemand/token_iterator.h for westmere: #include "simdjson/generic/ondemand/token_iterator.h" */ +/* begin file simdjson/generic/ondemand/token_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * Iterates through JSON tokens (`{` `}` `[` `]` `,` `:` `""` `123` `true` `false` `null`) + * detected by stage 1. + * + * @private This is not intended for external use. + */ +class token_iterator { +public: + /** + * Create a new invalid token_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline token_iterator() noexcept = default; + simdjson_inline token_iterator(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator &operator=(token_iterator &&other) noexcept = default; + simdjson_inline token_iterator(const token_iterator &other) noexcept = default; + simdjson_inline token_iterator &operator=(const token_iterator &other) noexcept = default; + + /** + * Advance to the next token (returning the current one). + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + /** + * Reports the current offset in bytes from the start of the underlying buffer. + */ + simdjson_inline uint32_t current_offset() const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for a given token. + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + + /** + * Return the current index. + */ + simdjson_inline token_position position() const noexcept; + /** + * Reset to a previously saved index. + */ + simdjson_inline void set_position(token_position target_position) noexcept; + + // NOTE: we don't support a full C++ iterator interface, because we expect people to make + // different calls to advance the iterator based on *their own* state. + + simdjson_inline bool operator==(const token_iterator &other) const noexcept; + simdjson_inline bool operator!=(const token_iterator &other) const noexcept; + simdjson_inline bool operator>(const token_iterator &other) const noexcept; + simdjson_inline bool operator>=(const token_iterator &other) const noexcept; + simdjson_inline bool operator<(const token_iterator &other) const noexcept; + simdjson_inline bool operator<=(const token_iterator &other) const noexcept; + +protected: + simdjson_inline token_iterator(const uint8_t *buf, token_position position) noexcept; + + /** + * Get the index of the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = current token, + * 1 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_index(int32_t delta=0) const noexcept; + /** + * Get the index of the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token. + * + */ + simdjson_inline uint32_t peek_index(token_position position) const noexcept; + + const uint8_t *buf{}; + token_position _position{}; + + friend class json_iterator; + friend class value_iterator; + friend class object; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::token_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_H +/* end file simdjson/generic/ondemand/token_iterator.h for westmere */ +/* including simdjson/generic/ondemand/json_iterator.h for westmere: #include "simdjson/generic/ondemand/json_iterator.h" */ +/* begin file simdjson/generic/ondemand/json_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * Iterates through JSON tokens, keeping track of depth and string buffer. + * + * @private This is not intended for external use. + */ +class json_iterator { +protected: + token_iterator token{}; + ondemand::parser *parser{}; + /** + * Next free location in the string buffer. + * + * Used by raw_json_string::unescape() to have a place to unescape strings to. + */ + uint8_t *_string_buf_loc{}; + /** + * JSON error, if there is one. + * + * INCORRECT_TYPE and NO_SUCH_FIELD are *not* stored here, ever. + * + * PERF NOTE: we *hope* this will be elided into control flow, as it is only used (a) in the first + * iteration of the loop, or (b) for the final iteration after a missing comma is found in ++. If + * this is not elided, we should make sure it's at least not using up a register. Failing that, + * we should store it in document so there's only one of them. + */ + error_code error{SUCCESS}; + /** + * Depth of the current token in the JSON. + * + * - 0 = finished with document + * - 1 = document root value (could be [ or {, not yet known) + * - 2 = , or } inside root array/object + * - 3 = key or value inside root array/object. + */ + depth_t _depth{}; + /** + * Beginning of the document indexes. + * Normally we have root == parser->implementation->structural_indexes.get() + * but this may differ, especially in streaming mode (where we have several + * documents); + */ + token_position _root{}; + /** + * Normally, a json_iterator operates over a single document, but in + * some cases, we may have a stream of documents. This attribute is meant + * as meta-data: the json_iterator works the same irrespective of the + * value of this attribute. + */ + bool _streaming{false}; + +public: + simdjson_inline json_iterator() noexcept = default; + simdjson_inline json_iterator(json_iterator &&other) noexcept; + simdjson_inline json_iterator &operator=(json_iterator &&other) noexcept; + simdjson_inline explicit json_iterator(const json_iterator &other) noexcept = default; + simdjson_inline json_iterator &operator=(const json_iterator &other) noexcept = default; + /** + * Skips a JSON value, whether it is a scalar, array or object. + */ + simdjson_warn_unused simdjson_inline error_code skip_child(depth_t parent_depth) noexcept; + + /** + * Tell whether the iterator is still at the start + */ + simdjson_inline bool at_root() const noexcept; + + /** + * Tell whether we should be expected to run in streaming + * mode (iterating over many documents). It is pure metadata + * that does not affect how the iterator works. It is used by + * start_root_array() and start_root_object(). + */ + simdjson_inline bool streaming() const noexcept; + + /** + * Get the root value iterator + */ + simdjson_inline token_position root_position() const noexcept; + /** + * Assert that we are at the document depth (== 1) + */ + simdjson_inline void assert_at_document_depth() const noexcept; + /** + * Assert that we are at the root of the document + */ + simdjson_inline void assert_at_root() const noexcept; + + /** + * Tell whether the iterator is at the EOF mark + */ + simdjson_inline bool at_end() const noexcept; + + /** + * Tell whether the iterator is live (has not been moved). + */ + simdjson_inline bool is_alive() const noexcept; + + /** + * Abandon this iterator, setting depth to 0 (as if the document is finished). + */ + simdjson_inline void abandon() noexcept; + + /** + * Advance the current token without modifying depth. + */ + simdjson_inline const uint8_t *return_current_and_advance() noexcept; + + /** + * Returns true if there is a single token in the index (i.e., it is + * a JSON with a scalar value such as a single number). + * + * @return whether there is a single token + */ + simdjson_inline bool is_single_token() const noexcept; + + /** + * Assert that there are at least the given number of tokens left. + * + * Has no effect in release builds. + */ + simdjson_inline void assert_more_tokens(uint32_t required_tokens=1) const noexcept; + /** + * Assert that the given position addresses an actual token (is within bounds). + * + * Has no effect in release builds. + */ + simdjson_inline void assert_valid_position(token_position position) const noexcept; + /** + * Get the JSON text for a given token (relative). + * + * This is not null-terminated; it is a view into the JSON. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(int32_t delta=0) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param delta The relative position of the token to retrieve. e.g. 0 = next token, -1 = prev token. + */ + simdjson_inline uint32_t peek_length(int32_t delta=0) const noexcept; + /** + * Get a pointer to the current location in the input buffer. + * + * This is not null-terminated; it is a view into the JSON. + * + * You may be pointing outside of the input buffer: it is not generally + * safe to dereference this pointer. + */ + simdjson_inline const uint8_t *unsafe_pointer() const noexcept; + /** + * Get the JSON text for a given token. + * + * This is not null-terminated; it is a view into the JSON. + * + * @param position The position of the token to retrieve. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek(token_position position) const noexcept; + /** + * Get the maximum length of the JSON text for the current token (or relative). + * + * The length will include any whitespace at the end of the token. + * + * @param position The position of the token to retrieve. + */ + simdjson_inline uint32_t peek_length(token_position position) const noexcept; + /** + * Get the JSON text for the last token in the document. + * + * This is not null-terminated; it is a view into the JSON. + * + * TODO consider a string_view, assuming the length will get stripped out by the optimizer when + * it isn't used ... + */ + simdjson_inline const uint8_t *peek_last() const noexcept; + + /** + * Ascend one level. + * + * Validates that the depth - 1 == parent_depth. + * + * @param parent_depth the expected parent depth. + */ + simdjson_inline void ascend_to(depth_t parent_depth) noexcept; + + /** + * Descend one level. + * + * Validates that the new depth == child_depth. + * + * @param child_depth the expected child depth. + */ + simdjson_inline void descend_to(depth_t child_depth) noexcept; + simdjson_inline void descend_to(depth_t child_depth, int32_t delta) noexcept; + + /** + * Get current depth. + */ + simdjson_inline depth_t depth() const noexcept; + + /** + * Get current (writeable) location in the string buffer. + */ + simdjson_inline uint8_t *&string_buf_loc() noexcept; + + /** + * Report an unrecoverable error, preventing further iteration. + * + * @param error The error to report. Must not be SUCCESS, UNINITIALIZED, INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code report_error(error_code error, const char *message) noexcept; + + /** + * Log error, but don't stop iteration. + * @param error The error to report. Must be INCORRECT_TYPE, or NO_SUCH_FIELD. + * @param message An error message to report with the error. + */ + simdjson_inline error_code optional_error(error_code error, const char *message) noexcept; + + /** + * Take an input in json containing max_len characters and attempt to copy it over to tmpbuf, a buffer with + * N bytes of capacity. It will return false if N is too small (smaller than max_len) of if it is zero. + * The buffer (tmpbuf) is padded with space characters. + */ + simdjson_warn_unused simdjson_inline bool copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept; + + simdjson_inline token_position position() const noexcept; + /** + * Write the raw_json_string to the string buffer and return a string_view. + * Each raw_json_string should be unescaped once, or else the string buffer might + * overflow. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, bool allow_replacement) noexcept; + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in) noexcept; + simdjson_inline void reenter_child(token_position position, depth_t child_depth) noexcept; + + simdjson_inline error_code consume_character(char c) noexcept; +#if SIMDJSON_DEVELOPMENT_CHECKS + simdjson_inline token_position start_position(depth_t depth) const noexcept; + simdjson_inline void set_start_position(depth_t depth, token_position position) noexcept; +#endif + + /* Useful for debugging and logging purposes. */ + inline std::string to_string() const noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Updates this json iterator so that it is back at the beginning of the document, + * as if it had just been created. + */ + inline void rewind() noexcept; + /** + * This checks whether the {,},[,] are balanced so that the document + * ends with proper zero depth. This requires scanning the whole document + * and it may be expensive. It is expected that it will be rarely called. + * It does not attempt to match { with } and [ with ]. + */ + inline bool balanced() const noexcept; +protected: + simdjson_inline json_iterator(const uint8_t *buf, ondemand::parser *parser) noexcept; + /// The last token before the end + simdjson_inline token_position last_position() const noexcept; + /// The token *at* the end. This points at gibberish and should only be used for comparison. + simdjson_inline token_position end_position() const noexcept; + /// The end of the buffer. + simdjson_inline token_position end() const noexcept; + + friend class document; + friend class document_stream; + friend class object; + friend class array; + friend class value; + friend class raw_json_string; + friend class parser; + friend class value_iterator; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, logger::log_level level, Args&&... args) noexcept; + template + friend simdjson_inline void logger::log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, logger::log_level level, Args&&... args) noexcept; +}; // json_iterator + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::json_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_H +/* end file simdjson/generic/ondemand/json_iterator.h for westmere */ +/* including simdjson/generic/ondemand/json_type.h for westmere: #include "simdjson/generic/ondemand/json_type.h" */ +/* begin file simdjson/generic/ondemand/json_type.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * The type of a JSON value. + */ +enum class json_type { + // Start at 1 to catch uninitialized / default values more easily + array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) + object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) + number, ///< A JSON number ( 1 or -2.3 or 4.5e6 ...) + string, ///< A JSON string ( "a" or "hello world\n" ...) + boolean, ///< A JSON boolean (true or false) + null ///< A JSON null (null) +}; + +/** + * A type representing a JSON number. + * The design of the struct is deliberately straight-forward. All + * functions return standard values with no error check. + */ +struct number { + + /** + * return the automatically determined type of + * the number: number_type::floating_point_number, + * number_type::signed_integer or number_type::unsigned_integer. + * + * enum class number_type { + * floating_point_number=1, /// a binary64 number + * signed_integer, /// a signed integer that fits in a 64-bit word using two's complement + * unsigned_integer /// a positive integer larger or equal to 1<<63 + * }; + */ + simdjson_inline ondemand::number_type get_number_type() const noexcept; + /** + * return true if the automatically determined type of + * the number is number_type::unsigned_integer. + */ + simdjson_inline bool is_uint64() const noexcept; + /** + * return the value as a uint64_t, only valid if is_uint64() is true. + */ + simdjson_inline uint64_t get_uint64() const noexcept; + simdjson_inline operator uint64_t() const noexcept; + + /** + * return true if the automatically determined type of + * the number is number_type::signed_integer. + */ + simdjson_inline bool is_int64() const noexcept; + /** + * return the value as a int64_t, only valid if is_int64() is true. + */ + simdjson_inline int64_t get_int64() const noexcept; + simdjson_inline operator int64_t() const noexcept; + + + /** + * return true if the automatically determined type of + * the number is number_type::floating_point_number. + */ + simdjson_inline bool is_double() const noexcept; + /** + * return the value as a double, only valid if is_double() is true. + */ + simdjson_inline double get_double() const noexcept; + simdjson_inline operator double() const noexcept; + + /** + * Convert the number to a double. Though it always succeed, the conversion + * may be lossy if the number cannot be represented exactly. + */ + simdjson_inline double as_double() const noexcept; + + +protected: + /** + * The next block of declaration is designed so that we can call the number parsing + * functions on a number type. They are protected and should never be used outside + * of the core simdjson library. + */ + friend class value_iterator; + template + friend error_code numberparsing::slow_float_parsing(simdjson_unused const uint8_t * src, W writer); + template + friend error_code numberparsing::write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer); + template + friend error_code numberparsing::parse_number(const uint8_t *const src, W &writer); + /** Store a signed 64-bit value to the number. */ + simdjson_inline void append_s64(int64_t value) noexcept; + /** Store an unsigned 64-bit value to the number. */ + simdjson_inline void append_u64(uint64_t value) noexcept; + /** Store a double value to the number. */ + simdjson_inline void append_double(double value) noexcept; + /** Specifies that the value is a double, but leave it undefined. */ + simdjson_inline void skip_double() noexcept; + /** + * End of friend declarations. + */ + + /** + * Our attributes are a union type (size = 64 bits) + * followed by a type indicator. + */ + union { + double floating_point_number; + int64_t signed_integer; + uint64_t unsigned_integer; + } payload{0}; + number_type type{number_type::signed_integer}; +}; + +/** + * Write the JSON type to the output stream + * + * @param out The output stream. + * @param type The json_type. + */ +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept; + +#if SIMDJSON_EXCEPTIONS +/** + * Send JSON type to an output stream. + * + * @param out The output stream. + * @param type The json_type. + * @throw simdjson_error if the result being printed has an error. If there is an error with the + * underlying output stream, that error will be propagated (simdjson_error will not be + * thrown). + */ +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false); +#endif + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::json_type &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_H +/* end file simdjson/generic/ondemand/json_type.h for westmere */ +/* including simdjson/generic/ondemand/raw_json_string.h for westmere: #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A string escaped per JSON rules, terminated with quote ("). They are used to represent + * unescaped keys inside JSON documents. + * + * (In other words, a pointer to the beginning of a string, just after the start quote, inside a + * JSON file.) + * + * This class is deliberately simplistic and has little functionality. You can + * compare a raw_json_string instance with an unescaped C string, but + * that is nearly all you can do. + * + * The raw_json_string is unescaped. If you wish to write an unescaped version of it to your own + * buffer, you may do so using the parser.unescape(string, buff) method, using an ondemand::parser + * instance. Doing so requires you to have a sufficiently large buffer. + * + * The raw_json_string instances originate typically from field instance which in turn represent + * key-value pairs from object instances. From a field instance, you get the raw_json_string + * instance by calling key(). You can, if you want a more usable string_view instance, call + * the unescaped_key() method on the field instance. You may also create a raw_json_string from + * any other string value, with the value.get_raw_json_string() method. Again, you can get + * a more usable string_view instance by calling get_string(). + * + */ +class raw_json_string { +public: + /** + * Create a new invalid raw_json_string. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline raw_json_string() noexcept = default; + + /** + * Create a new invalid raw_json_string pointed at the given location in the JSON. + * + * The given location must be just *after* the beginning quote (") in the JSON file. + * + * It *must* be terminated by a ", and be a valid JSON string. + */ + simdjson_inline raw_json_string(const uint8_t * _buf) noexcept; + /** + * Get the raw pointer to the beginning of the string in the JSON (just after the "). + * + * It is possible for this function to return a null pointer if the instance + * has outlived its existence. + */ + simdjson_inline const char * raw() const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done) on target.size() characters, + * and if the raw_json_string instance has a quote character at byte index target.size(). + * We never read more than length + 1 bytes in the raw_json_string instance. + * If length is smaller than target.size(), this will return false. + * + * The std::string_view instance may contain any characters. However, the caller + * is responsible for setting length so that length bytes may be read in the + * raw_json_string. + * + * Performance: the comparison may be done using memcmp which may be efficient + * for long strings. + */ + simdjson_inline bool unsafe_is_equal(size_t length, std::string_view target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The std::string_view instance should not contain unescaped quote characters: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * Performance: the comparison is done byte-by-byte which might be inefficient for + * long strings. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + * The provided C string should not contain an unescaped quote character: + * the caller is responsible for this check. See is_free_from_unescaped_quote. + * + * If target is a compile-time constant, and your compiler likes you, + * you should be able to do the following without performance penalty... + * + * static_assert(raw_json_string::is_free_from_unescaped_quote(target), ""); + * s.unsafe_is_equal(target); + */ + simdjson_inline bool unsafe_is_equal(const char* target) const noexcept; + + /** + * This compares the current instance to the std::string_view target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(std::string_view target) const noexcept; + + /** + * This compares the current instance to the C string target: returns true if + * they are byte-by-byte equal (no escaping is done). + */ + simdjson_inline bool is_equal(const char* target) const noexcept; + + /** + * Returns true if target is free from unescaped quote. If target is known at + * compile-time, we might expect the computation to happen at compile time with + * many compilers (not all!). + */ + static simdjson_inline bool is_free_from_unescaped_quote(std::string_view target) noexcept; + static simdjson_inline bool is_free_from_unescaped_quote(const char* target) noexcept; + +private: + + + /** + * This will set the inner pointer to zero, effectively making + * this instance unusable. + */ + simdjson_inline void consume() noexcept { buf = nullptr; } + + /** + * Checks whether the inner pointer is non-null and thus usable. + */ + simdjson_inline simdjson_warn_unused bool alive() const noexcept { return buf != nullptr; } + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result will be a valid UTF-8. + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + * @param allow_replacement Whether we allow replacement of invalid surrogate pairs. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape(json_iterator &iter, bool allow_replacement) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. + * The result may not be a valid UTF-8. https://simonsapin.github.io/wtf-8/ + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid until the next parse() call on the parser. + * + * @param iter A json_iterator, which contains a buffer where the string will be written. + */ + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(json_iterator &iter) const noexcept; + const uint8_t * buf{}; + friend class object; + friend class field; + friend class parser; + friend struct simdjson_result; +}; + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &, const raw_json_string &) noexcept; + +/** + * Comparisons between raw_json_string and std::string_view instances are potentially unsafe: the user is responsible + * for providing a string with no unescaped quote. Note that unescaped quotes cannot be present in valid JSON strings. + */ +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept; +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept; +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept; + + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline ~simdjson_result() noexcept = default; ///< @private + + simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; + simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H +/* end file simdjson/generic/ondemand/raw_json_string.h for westmere */ +/* including simdjson/generic/ondemand/parser.h for westmere: #include "simdjson/generic/ondemand/parser.h" */ +/* begin file simdjson/generic/ondemand/parser.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * The default batch size for document_stream instances for this On Demand kernel. + * Note that different On Demand kernel may use a different DEFAULT_BATCH_SIZE value + * in the future. + */ +static constexpr size_t DEFAULT_BATCH_SIZE = 1000000; +/** + * Some adversary might try to set the batch size to 0 or 1, which might cause problems. + * We set a minimum of 32B since anything else is highly likely to be an error. In practice, + * most users will want a much larger batch size. + * + * All non-negative MINIMAL_BATCH_SIZE values should be 'safe' except that, obviously, no JSON + * document can ever span 0 or 1 byte and that very large values would create memory allocation issues. + */ +static constexpr size_t MINIMAL_BATCH_SIZE = 32; + +/** + * A JSON fragment iterator. + * + * This holds the actual iterator as well as the buffer for writing strings. + */ +class parser { +public: + /** + * Create a JSON parser. + * + * The new parser will have zero capacity. + */ + inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept; + + inline parser(parser &&other) noexcept = default; + simdjson_inline parser(const parser &other) = delete; + simdjson_inline parser &operator=(const parser &other) = delete; + simdjson_inline parser &operator=(parser &&other) noexcept = default; + + /** Deallocate the JSON parser. */ + inline ~parser() noexcept = default; + + /** + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * document doc = parser.iterate(json); + * + * It is expected that the content is a valid UTF-8 file, containing a valid JSON document. + * Otherwise the iterate method may return an error. In particular, the whole input should be + * valid: we do not attempt to tolerate incorrect content either before or after a JSON + * document. + * + * ### IMPORTANT: Validate what you use + * + * Calling iterate on an invalid JSON document may not immediately trigger an error. The call to + * iterate does not parse and validate the whole document. + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * @param len The length of the JSON. + * @param capacity The number of bytes allocated in the JSON (must be at least len+SIMDJSON_PADDING). + * + * @return The document, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate(padded_string_view json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const char *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const uint8_t *json, size_t len, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; + /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + simdjson_warn_unused simdjson_result iterate(padded_string &&json) & noexcept = delete; + + /** + * @private + * + * Start iterating an on-demand JSON document. + * + * ondemand::parser parser; + * json_iterator doc = parser.iterate(json); + * + * ### IMPORTANT: Buffer Lifetime + * + * Because parsing is done while you iterate, you *must* keep the JSON buffer around at least as + * long as the document iteration. + * + * ### IMPORTANT: Document Lifetime + * + * Only one iteration at a time can happen per parser, and the parser *must* be kept alive during + * iteration to ensure intermediate buffers can be accessed. Any document must be destroyed before + * you call parse() again or destroy the parser. + * + * The ondemand::document instance holds the iterator. The document must remain in scope + * while you are accessing instances of ondemand::value, ondemand::object, ondemand::array. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * @param json The JSON to parse. + * + * @return The iterator, or an error: + * - INSUFFICIENT_PADDING if the input has less than SIMDJSON_PADDING extra bytes. + * - MEMALLOC if realloc_if_needed the parser does not have enough capacity, and memory + * allocation fails. + * - EMPTY if the document is all whitespace. + * - UTF8_ERROR if the document is not valid UTF-8. + * - UNESCAPED_CHARS if a string contains control characters that must be escaped + * - UNCLOSED_STRING if there is an unclosed string in the document. + */ + simdjson_warn_unused simdjson_result iterate_raw(padded_string_view json) & noexcept; + + + /** + * Parse a buffer containing many JSON documents. + * + * auto json = R"({ "foo": 1 } { "foo": 2 } { "foo": 3 } )"_padded; + * ondemand::parser parser; + * ondemand::document_stream docs = parser.iterate_many(json); + * for (auto & doc : docs) { + * std::cout << doc["foo"] << std::endl; + * } + * // Prints 1 2 3 + * + * No copy of the input buffer is made. + * + * The function is lazy: it may be that no more than one JSON document at a time is parsed. + * + * The caller is responsabile to ensure that the input string data remains unchanged and is + * not deleted during the loop. + * + * ### Format + * + * The buffer must contain a series of one or more JSON documents, concatenated into a single + * buffer, separated by ASCII whitespace. It effectively parses until it has a fully valid document, + * then starts parsing the next document at that point. (It does this with more parallelism and + * lookahead than you might think, though.) + * + * documents that consist of an object or array may omit the whitespace between them, concatenating + * with no separator. Documents that consist of a single primitive (i.e. documents that are not + * arrays or objects) MUST be separated with ASCII whitespace. + * + * The characters inside a JSON document, and between JSON documents, must be valid Unicode (UTF-8). + * + * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. + * Setting batch_size to excessively large or excessively small values may impact negatively the + * performance. + * + * ### REQUIRED: Buffer Padding + * + * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what + * those bytes are initialized to, as long as they are allocated. These bytes will be read: if you + * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the + * SIMDJSON_PADDING bytes to avoid runtime warnings. + * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * + * ### Parser Capacity + * + * If the parser's current capacity is less than batch_size, it will allocate enough capacity + * to handle it (up to max_capacity). + * + * @param buf The concatenated JSON to parse. + * @param len The length of the concatenated JSON. + * @param batch_size The batch size to use. MUST be larger than the largest document. The sweet + * spot is cache-related: small enough to fit in cache, yet big enough to + * parse as many documents as possible in one tight loop. + * Defaults to 10MB, which has been a reasonable sweet spot in our tests. + * @param allow_comma_separated (defaults on false) This allows a mode where the documents are + * separated by commas instead of whitespace. It comes with a performance + * penalty because the entire document is indexed at once (and the document must be + * less than 4 GB), and there is no multithreading. In this mode, the batch_size parameter + * is effectively ignored, as it is set to at least the document size. + * @return The stream, or an error. An empty input will yield 0 documents rather than an EMPTY error. Errors: + * - MEMALLOC if the parser does not have enough capacity and memory allocation fails + * - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity. + * - other json errors if parsing fails. You should not rely on these errors to always the same for the + * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). + */ + inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + + /** @private We do not want to allow implicit conversion from C string to std::string. */ + simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; + + /** The capacity of this parser (the largest document it can process). */ + simdjson_inline size_t capacity() const noexcept; + /** The maximum capacity of this parser (the largest document it is allowed to process). */ + simdjson_inline size_t max_capacity() const noexcept; + simdjson_inline void set_max_capacity(size_t max_capacity) noexcept; + /** + * The maximum depth of this parser (the most deeply nested objects and arrays it can process). + * This parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + */ + simdjson_inline size_t max_depth() const noexcept; + + /** + * Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length + * and `max_depth` depth. + * + * The max_depth parameter is only relevant when the macro SIMDJSON_DEVELOPMENT_CHECKS is set to true. + * The document's instance current_depth() method should be used to monitor the parsing + * depth and limit it if desired. + * + * @param capacity The new capacity. + * @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH. + * @return The error, if there is one. + */ + simdjson_warn_unused error_code allocate(size_t capacity, size_t max_depth=DEFAULT_MAX_DEPTH) noexcept; + + #ifdef SIMDJSON_THREADS_ENABLED + /** + * The parser instance can use threads when they are available to speed up some + * operations. It is enabled by default. Changing this attribute will change the + * behavior of the parser for future operations. + */ + bool threaded{true}; + #endif + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result must be valid UTF-8. + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @param allow_replacement Whether we allow a replacement if the input string contains unmatched surrogate pairs. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement = false) const noexcept; + + /** + * Unescape this JSON string, replacing \\ with \, \n with newline, etc. to a user-provided buffer. + * The result may not be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * The provided pointer is advanced to the end of the string by reference, and a string_view instance + * is returned. You can ensure that your buffer is large enough by allocating a block of memory at least + * as large as the input JSON plus SIMDJSON_PADDING and then unescape all strings to this one buffer. + * + * This unescape function is a low-level function. If you want a more user-friendly approach, you should + * avoid raw_json_string instances (e.g., by calling unescaped_key() instead of key() or get_string() + * instead of get_raw_json_string()). + * + * ## IMPORTANT: string_view lifetime + * + * The string_view is only valid as long as the bytes in dst. + * + * @param raw_json_string input + * @param dst A pointer to a buffer at least large enough to write this string as well as + * an additional SIMDJSON_PADDING bytes. + * @return A string_view pointing at the unescaped string in dst + * @error STRING_ERROR if escapes are incorrect. + */ + simdjson_inline simdjson_result unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept; + +private: + /** @private [for benchmarking access] The implementation to use */ + std::unique_ptr implementation{}; + size_t _capacity{0}; + size_t _max_capacity; + size_t _max_depth{DEFAULT_MAX_DEPTH}; + std::unique_ptr string_buf{}; +#if SIMDJSON_DEVELOPMENT_CHECKS + std::unique_ptr start_positions{}; +#endif + + friend class json_iterator; + friend class document_stream; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::parser &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_H +/* end file simdjson/generic/ondemand/parser.h for westmere */ + +// All other declarations +/* including simdjson/generic/ondemand/array.h for westmere: #include "simdjson/generic/ondemand/array.h" */ +/* begin file simdjson/generic/ondemand/array.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A forward-only JSON array. + */ +class array { +public: + /** + * Create a new invalid array. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline array() noexcept = default; + + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() noexcept; + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an array is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the beginning of the array and checks whether the + * array is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result is_empty() & noexcept; + /** + * Reset the iterator so that we are pointing back at the + * beginning of the array. You should still consume values only once even if you + * can iterate through the array more than once. If you unescape a string + * within the array more than once, you have unsafe code. Note that rewinding + * an array means that you may need to reparse it anew: it is not a free + * operation. + * + * @returns true if the array contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/0/foo/a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an array + * instance: there is no rewind and no invalidation. + * + * You may only call at_pointer on an array after it has been created, but before it has + * been first accessed. When calling at_pointer on an array, the pointer is advanced to + * the location indicated by the JSON pointer (in case of success). It is no longer possible + * to call at_pointer on the same array. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the array and returns a string_view instance corresponding to the + * array as represented in JSON. It points inside the original document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + + /** + * Get the value at the given index. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) noexcept; +protected: + /** + * Go to the end of the array, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + + /** + * Begin array iteration. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + */ + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + /** + * Begin array iteration from the root. + * + * @param iter The iterator. Must be where the initial [ is expected. Will be *moved* into the + * resulting array. + * @error INCORRECT_TYPE if the iterator is not at [. + * @error TAPE_ERROR if there is no closing ] at the end of the document. + */ + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + /** + * Begin array iteration. + * + * This version of the method should be called after the initial [ has been verified, and is + * intended for use by switch statements that check the type of a value. + * + * @param iter The iterator. Must be after the initial [. Will be *moved* into the resulting array. + */ + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + + /** + * Create an array at the given Internal array creation. Call array::start() or array::started() instead of this. + * + * @param iter The iterator. Must either be at the start of the first element with iter.is_alive() + * == true, or past the [] with is_alive() == false if the array is empty. Will be *moved* + * into the resulting array. + */ + simdjson_inline array(const value_iterator &iter) noexcept; + + /** + * Iterator marking current position. + * + * iter.is_alive() == false indicates iteration is complete. + */ + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; + friend struct simdjson_result; + friend class array_iterator; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::array &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + inline simdjson_result count_elements() & noexcept; + inline simdjson_result is_empty() & noexcept; + inline simdjson_result reset() & noexcept; + simdjson_inline simdjson_result at(size_t index) noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_H +/* end file simdjson/generic/ondemand/array.h for westmere */ +/* including simdjson/generic/ondemand/array_iterator.h for westmere: #include "simdjson/generic/ondemand/array_iterator.h" */ +/* begin file simdjson/generic/ondemand/array_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A forward-only JSON array. + * + * This is an input_iterator, meaning: + * - It is forward-only + * - * must be called exactly once per element. + * - ++ must be called exactly once in between each * (*, ++, *, ++, * ...) + */ +class array_iterator { +public: + /** Create a new, invalid array iterator. */ + simdjson_inline array_iterator() noexcept = default; + + // + // Iterator interface + // + + /** + * Get the current element. + * + * Part of the std::iterator interface. + */ + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + /** + * Check if we are at the end of the JSON. + * + * Part of the std::iterator interface. + * + * @return true if there are no more elements in the JSON array. + */ + simdjson_inline bool operator==(const array_iterator &) const noexcept; + /** + * Check if there are more elements in the JSON array. + * + * Part of the std::iterator interface. + * + * @return true if there are more elements in the JSON array. + */ + simdjson_inline bool operator!=(const array_iterator &) const noexcept; + /** + * Move to the next element. + * + * Part of the std::iterator interface. + */ + simdjson_inline array_iterator &operator++() noexcept; + +private: + value_iterator iter{}; + + simdjson_inline array_iterator(const value_iterator &iter) noexcept; + + friend class array; + friend class value; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::array_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_H +/* end file simdjson/generic/ondemand/array_iterator.h for westmere */ +/* including simdjson/generic/ondemand/document.h for westmere: #include "simdjson/generic/ondemand/document.h" */ +/* begin file simdjson/generic/ondemand/document.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A JSON document. It holds a json_iterator instance. + * + * Used by tokens to get text, and string buffer location. + * + * You must keep the document around during iteration. + */ +class document { +public: + /** + * Create a new invalid document. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline document() noexcept = default; + simdjson_inline document(const document &other) noexcept = delete; // pass your documents by reference, not by copy + simdjson_inline document(document &&other) noexcept = default; + simdjson_inline document &operator=(const document &other) noexcept = delete; + simdjson_inline document &operator=(document &&other) noexcept = default; + + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @returns INCORRECT_TYPE If the JSON value is not an array. + */ + simdjson_inline simdjson_result get_array() & noexcept; + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @returns INCORRECT_TYPE If the JSON value is not an object. + */ + simdjson_inline simdjson_result get_object() & noexcept; + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64() noexcept; + /** + * Cast this JSON value (inside string) to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64() noexcept; + /** + * Cast this JSON value (inside string) to a signed integer. + * + * @returns A signed 64-bit integer. + * @returns INCORRECT_TYPE If the JSON value is not a 64-bit integer. + */ + simdjson_inline simdjson_result get_int64_in_string() noexcept; + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double() noexcept; + + /** + * Cast this JSON value (inside string) to a double. + * + * @returns A double. + * @returns INCORRECT_TYPE If the JSON value is not a valid floating-point number. + */ + simdjson_inline simdjson_result get_double_in_string() noexcept; + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * Important: Calling get_string() twice on the same document is an error. + * + * @param Whether to allow a replacement character for unmatched surrogate pairs. + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + /** + * Cast this JSON value to a string. + * + * The string is not guaranteed to be valid UTF-8. See https://simonsapin.github.io/wtf-8/ + * + * Important: Calling get_wobbly_string() twice on the same document is an error. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_wobbly_string() noexcept; + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @returns INCORRECT_TYPE if the JSON value is not a string. + */ + simdjson_inline simdjson_result get_raw_json_string() noexcept; + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @returns INCORRECT_TYPE if the JSON value is not true or false. + */ + simdjson_inline simdjson_result get_bool() noexcept; + /** + * Cast this JSON value to a value when the document is an object or an array. + * + * @returns A value if a JSON array or object cannot be found. + * @returns SCALAR_DOCUMENT_AS_VALUE error is the document is a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result get_value() noexcept; + + /** + * Checks if this JSON value is null. If and only if the value is + * null, then it is consumed (we advance). If we find a token that + * begins with 'n' but is not 'null', then an error is returned. + * + * @returns Whether the value is null. + * @returns INCORRECT_TYPE If the JSON value begins with 'n' and is not 'null'. + */ + simdjson_inline simdjson_result is_null() noexcept; + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * + * You may use get_double(), get_bool(), get_uint64(), get_int64(), + * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template simdjson_inline simdjson_result get() & noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + /** @overload template simdjson_result get() & noexcept */ + template simdjson_inline simdjson_result get() && noexcept { + // Unless the simdjson library provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library."); + } + + /** + * Get this value as the given type. + * + * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool, value + * + * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template simdjson_inline error_code get(T &out) & noexcept; + /** @overload template error_code get(T &out) & noexcept */ + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + /** + * Cast this JSON value to an array. + * + * @returns An object that can be used to iterate the array. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an array. + */ + simdjson_inline operator array() & noexcept(false); + /** + * Cast this JSON value to an object. + * + * @returns An object that can be used to look up or iterate fields. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not an object. + */ + simdjson_inline operator object() & noexcept(false); + /** + * Cast this JSON value to an unsigned integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit unsigned integer. + */ + simdjson_inline operator uint64_t() noexcept(false); + /** + * Cast this JSON value to a signed integer. + * + * @returns A signed 64-bit integer. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a 64-bit integer. + */ + simdjson_inline operator int64_t() noexcept(false); + /** + * Cast this JSON value to a double. + * + * @returns A double. + * @exception simdjson_error(INCORRECT_TYPE) If the JSON value is not a valid floating-point number. + */ + simdjson_inline operator double() noexcept(false); + /** + * Cast this JSON value to a string. + * + * The string is guaranteed to be valid UTF-8. + * + * @returns An UTF-8 string. The string is stored in the parser and will be invalidated the next + * time it parses a document or when it is destroyed. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator std::string_view() noexcept(false); + /** + * Cast this JSON value to a raw_json_string. + * + * The string is guaranteed to be valid UTF-8, and may have escapes in it (e.g. \\ or \n). + * + * @returns A pointer to the raw JSON for the given string. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. + */ + simdjson_inline operator raw_json_string() noexcept(false); + /** + * Cast this JSON value to a bool. + * + * @returns A bool value. + * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not true or false. + */ + simdjson_inline operator bool() noexcept(false); + /** + * Cast this JSON value to a value. + * + * @returns A value value. + * @exception if a JSON value cannot be found + */ + simdjson_inline operator value() noexcept(false); +#endif + /** + * This method scans the array and counts the number of elements. + * The count_elements method should always be called before you have begun + * iterating through the array: it is expected that you are pointing at + * the beginning of the array. + * The runtime complexity is linear in the size of the array. After + * calling this function, if successful, the array is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + simdjson_inline simdjson_result count_elements() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Get the value at the given index in the array. This function has linear-time complexity. + * This function should only be called once on an array instance since the array iterator is not reset between each call. + * + * @return The value at the given index, or: + * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length + */ + simdjson_inline simdjson_result at(size_t index) & noexcept; + /** + * Begin array iteration. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result begin() & noexcept; + /** + * Sentinel representing the end of the array. + * + * Part of the std::iterable interface. + */ + simdjson_inline simdjson_result end() & noexcept; + + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to + * a key a single time. Doing object["mykey"].to_string()and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. E.g., the array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + + /** + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * NOTE: If you're only expecting a value to be one type (a typical case), it's generally + * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just + * let it throw an exception). + * + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result type() noexcept; + + /** + * Checks whether the document is a scalar (string, number, null, Boolean). + * Returns false when there it is an array or object. + * + * @returns true if the type is string, number, null, Boolean + * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + */ + simdjson_inline simdjson_result is_scalar() noexcept; + + /** + * Checks whether the document is a negative number. + * + * @returns true if the number if negative. + */ + simdjson_inline bool is_negative() noexcept; + /** + * Checks whether the document is an integer number. Note that + * this requires to partially parse the number string. If + * the value is determined to be an integer, it may still + * not parse properly as an integer in subsequent steps + * (e.g., it might overflow). + * + * @returns true if the number if negative. + */ + simdjson_inline simdjson_result is_integer() noexcept; + /** + * Determine the number type (integer or floating-point number) as quickly + * as possible. This function does not fully validate the input. It is + * useful when you only need to classify the numbers, without parsing them. + * + * If you are planning to retrieve the value or you need full validation, + * consider using the get_number() method instead: it will fully parse + * and validate the input, and give you access to the type: + * get_number().get_number_type(). + * + * get_number_type() is number_type::unsigned_integer if we have + * an integer greater or equal to 9223372036854775808 + * get_number_type() is number_type::signed_integer if we have an + * integer that is less than 9223372036854775808 + * Otherwise, get_number_type() has value number_type::floating_point_number + * + * This function requires processing the number string, but it is expected + * to be faster than get_number().get_number_type() because it is does not + * parse the number value. + * + * @returns the type of the number + */ + simdjson_inline simdjson_result get_number_type() noexcept; + + /** + * Attempt to parse an ondemand::number. An ondemand::number may + * contain an integer value or a floating-point value, the simdjson + * library will autodetect the type. Thus it is a dynamically typed + * number. Before accessing the value, you must determine the detected + * type. + * + * number.get_number_type() is number_type::signed_integer if we have + * an integer in [-9223372036854775808,9223372036854775808) + * You can recover the value by calling number.get_int64() and you + * have that number.is_int64() is true. + * + * number.get_number_type() is number_type::unsigned_integer if we have + * an integer in [9223372036854775808,18446744073709551616) + * You can recover the value by calling number.get_uint64() and you + * have that number.is_uint64() is true. + * + * Otherwise, number.get_number_type() has value number_type::floating_point_number + * and we have a binary64 number. + * You can recover the value by calling number.get_double() and you + * have that number.is_double() is true. + * + * You must check the type before accessing the value: it is an error + * to call "get_int64()" when number.get_number_type() is not + * number_type::signed_integer and when number.is_int64() is false. + */ + simdjson_warn_unused simdjson_inline simdjson_result get_number() noexcept; + + /** + * Get the raw JSON for this token. + * + * The string_view will always point into the input buffer. + * + * The string_view will start at the beginning of the token, and include the entire token + * *as well as all spaces until the next token (or EOF).* This means, for example, that a + * string token always begins with a " and is always terminated by the final ", possibly + * followed by a number of spaces. + * + * The string_view is *not* null-terminated. If this is a scalar (string, number, + * boolean, or null), the character after the end of the string_view may be the padded buffer. + * + * Tokens include: + * - { + * - [ + * - "a string (possibly with UTF-8 or backslashed characters like \\\")". + * - -1.2e-100 + * - true + * - false + * - null + */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + /** + * Reset the iterator inside the document instance so we are pointing back at the + * beginning of the document, as if it had just been created. It invalidates all + * values, objects and arrays that you have created so far (including unescaped strings). + */ + inline void rewind() noexcept; + /** + * Returns debugging information. + */ + inline std::string to_debug_string() noexcept; + /** + * Some unrecoverable error conditions may render the document instance unusable. + * The is_alive() method returns true when the document is still suitable. + */ + inline bool is_alive() noexcept; + + /** + * Returns the current location in the document if in bounds. + */ + inline simdjson_result current_location() const noexcept; + + /** + * Returns true if this document has been fully parsed. + * If you have consumed the whole document and at_end() returns + * false, then there may be trailing content. + */ + inline bool at_end() const noexcept; + + /** + * Returns the current depth in the document if in bounds. + * + * E.g., + * 0 = finished with document + * 1 = document root value (could be [ or {, not yet known) + * 2 = , or } inside root array/object + * 3 = key or value inside root array/object. + */ + simdjson_inline int32_t current_depth() const noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() automatically calls rewind between each call. Thus + * all values, objects and arrays that you have created so far (including unescaped strings) + * are invalidated. After calling at_pointer, you need to consume the result: string values + * should be stored in your own variables, arrays should be decoded and stored in your own array-like + * structures and so forth. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + * - SCALAR_DOCUMENT_AS_VALUE if the json_pointer is empty and the document is not a scalar (see is_scalar() function). + */ + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + /** + * Consumes the document and returns a string_view instance corresponding to the + * document as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; +protected: + /** + * Consumes the document. + */ + simdjson_inline error_code consume() noexcept; + + simdjson_inline document(ondemand::json_iterator &&iter) noexcept; + simdjson_inline const uint8_t *text(uint32_t idx) const noexcept; + + simdjson_inline value_iterator resume_value_iterator() noexcept; + simdjson_inline value_iterator get_root_value_iterator() noexcept; + simdjson_inline simdjson_result start_or_resume_object() noexcept; + static simdjson_inline document start(ondemand::json_iterator &&iter) noexcept; + + // + // Fields + // + json_iterator iter{}; ///< Current position in the document + static constexpr depth_t DOCUMENT_DEPTH = 0; ///< document depth is always 0 + + friend class array_iterator; + friend class value; + friend class ondemand::parser; + friend class object; + friend class array; + friend class field; + friend class token; + friend class document_stream; + friend class document_reference; +}; + + +/** + * A document_reference is a thin wrapper around a document reference instance. + */ +class document_reference { +public: + simdjson_inline document_reference() noexcept; + simdjson_inline document_reference(document &d) noexcept; + simdjson_inline document_reference(const document_reference &other) noexcept = default; + simdjson_inline document_reference& operator=(const document_reference &other) noexcept = default; + simdjson_inline void rewind() noexcept; + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + + simdjson_inline simdjson_result is_null() noexcept; + simdjson_inline simdjson_result raw_json() noexcept; + simdjson_inline operator document&() const noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator array() & noexcept(false); + simdjson_inline operator object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + simdjson_inline simdjson_result raw_json_token() noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +private: + document *doc{nullptr}; +}; +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::document &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + + template simdjson_inline simdjson_result get() & noexcept; + template simdjson_inline simdjson_result get() && noexcept; + + template simdjson_inline error_code get(T &out) & noexcept; + template simdjson_inline error_code get(T &out) && noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator westmere::ondemand::array() & noexcept(false); + simdjson_inline operator westmere::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator westmere::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline int32_t current_depth() const noexcept; + simdjson_inline bool at_end() const noexcept; + simdjson_inline bool is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + + + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::document_reference value, error_code error) noexcept; + simdjson_inline simdjson_result() noexcept = default; + simdjson_inline error_code rewind() noexcept; + + simdjson_inline simdjson_result get_array() & noexcept; + simdjson_inline simdjson_result get_object() & noexcept; + simdjson_inline simdjson_result get_uint64() noexcept; + simdjson_inline simdjson_result get_uint64_in_string() noexcept; + simdjson_inline simdjson_result get_int64() noexcept; + simdjson_inline simdjson_result get_int64_in_string() noexcept; + simdjson_inline simdjson_result get_double() noexcept; + simdjson_inline simdjson_result get_double_in_string() noexcept; + simdjson_inline simdjson_result get_string(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result get_wobbly_string() noexcept; + simdjson_inline simdjson_result get_raw_json_string() noexcept; + simdjson_inline simdjson_result get_bool() noexcept; + simdjson_inline simdjson_result get_value() noexcept; + simdjson_inline simdjson_result is_null() noexcept; + +#if SIMDJSON_EXCEPTIONS + simdjson_inline operator westmere::ondemand::array() & noexcept(false); + simdjson_inline operator westmere::ondemand::object() & noexcept(false); + simdjson_inline operator uint64_t() noexcept(false); + simdjson_inline operator int64_t() noexcept(false); + simdjson_inline operator double() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator westmere::ondemand::raw_json_string() noexcept(false); + simdjson_inline operator bool() noexcept(false); + simdjson_inline operator westmere::ondemand::value() noexcept(false); +#endif + simdjson_inline simdjson_result count_elements() & noexcept; + simdjson_inline simdjson_result count_fields() & noexcept; + simdjson_inline simdjson_result at(size_t index) & noexcept; + simdjson_inline simdjson_result begin() & noexcept; + simdjson_inline simdjson_result end() & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(const char *key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](const char *key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(const char *key) & noexcept; + simdjson_inline simdjson_result type() noexcept; + simdjson_inline simdjson_result is_scalar() noexcept; + simdjson_inline simdjson_result current_location() noexcept; + simdjson_inline simdjson_result current_depth() const noexcept; + simdjson_inline simdjson_result is_negative() noexcept; + simdjson_inline simdjson_result is_integer() noexcept; + simdjson_inline simdjson_result get_number_type() noexcept; + simdjson_inline simdjson_result get_number() noexcept; + /** @copydoc simdjson_inline std::string_view document_reference::raw_json_token() const noexcept */ + simdjson_inline simdjson_result raw_json_token() noexcept; + + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; +}; + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_H +/* end file simdjson/generic/ondemand/document.h for westmere */ +/* including simdjson/generic/ondemand/document_stream.h for westmere: #include "simdjson/generic/ondemand/document_stream.h" */ +/* begin file simdjson/generic/ondemand/document_stream.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#ifdef SIMDJSON_THREADS_ENABLED +#include +#include +#include +#endif + +namespace simdjson { +namespace westmere { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED +/** @private Custom worker class **/ +struct stage1_worker { + stage1_worker() noexcept = default; + stage1_worker(const stage1_worker&) = delete; + stage1_worker(stage1_worker&&) = delete; + stage1_worker operator=(const stage1_worker&) = delete; + ~stage1_worker(); + /** + * We only start the thread when it is needed, not at object construction, this may throw. + * You should only call this once. + **/ + void start_thread(); + /** + * Start a stage 1 job. You should first call 'run', then 'finish'. + * You must call start_thread once before. + */ + void run(document_stream * ds, parser * stage1, size_t next_batch_start); + /** Wait for the run to finish (blocking). You should first call 'run', then 'finish'. **/ + void finish(); + +private: + + /** + * Normally, we would never stop the thread. But we do in the destructor. + * This function is only safe assuming that you are not waiting for results. You + * should have called run, then finish, and be done. + **/ + void stop_thread(); + + std::thread thread{}; + /** These three variables define the work done by the thread. **/ + ondemand::parser * stage1_thread_parser{}; + size_t _next_batch_start{}; + document_stream * owner{}; + /** + * We have two state variables. This could be streamlined to one variable in the future but + * we use two for clarity. + */ + bool has_work{false}; + bool can_work{true}; + + /** + * We lock using a mutex. + */ + std::mutex locking_mutex{}; + std::condition_variable cond_var{}; + + friend class document_stream; +}; +#endif // SIMDJSON_THREADS_ENABLED + +/** + * A forward-only stream of documents. + * + * Produced by parser::iterate_many. + * + */ +class document_stream { +public: + /** + * Construct an uninitialized document_stream. + * + * ```c++ + * document_stream docs; + * auto error = parser.iterate_many(json).get(docs); + * ``` + */ + simdjson_inline document_stream() noexcept; + /** Move one document_stream to another. */ + simdjson_inline document_stream(document_stream &&other) noexcept = default; + /** Move one document_stream to another. */ + simdjson_inline document_stream &operator=(document_stream &&other) noexcept = default; + + simdjson_inline ~document_stream() noexcept; + + /** + * Returns the input size in bytes. + */ + inline size_t size_in_bytes() const noexcept; + + /** + * After iterating through the stream, this method + * returns the number of bytes that were not parsed at the end + * of the stream. If truncated_bytes() differs from zero, + * then the input was truncated maybe because incomplete JSON + * documents were found at the end of the stream. You + * may need to process the bytes in the interval [size_in_bytes()-truncated_bytes(), size_in_bytes()). + * + * You should only call truncated_bytes() after streaming through all + * documents, like so: + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto & doc : stream) { + * // do something with doc + * } + * size_t truncated = stream.truncated_bytes(); + * + */ + inline size_t truncated_bytes() const noexcept; + + class iterator { + public: + using value_type = simdjson_result; + using reference = value_type; + + using difference_type = std::ptrdiff_t; + + using iterator_category = std::input_iterator_tag; + + /** + * Default constructor. + */ + simdjson_inline iterator() noexcept; + /** + * Get the current document (or error). + */ + simdjson_inline simdjson_result operator*() noexcept; + /** + * Advance to the next document (prefix). + */ + inline iterator& operator++() noexcept; + /** + * Check if we're at the end yet. + * @param other the end iterator to compare to. + */ + simdjson_inline bool operator!=(const iterator &other) const noexcept; + /** + * @private + * + * Gives the current index in the input document in bytes. + * + * document_stream stream = parser.parse_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * auto doc = *i; + * size_t index = i.current_index(); + * } + * + * This function (current_index()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + */ + simdjson_inline size_t current_index() const noexcept; + + /** + * @private + * + * Gives a view of the current document at the current position. + * + * document_stream stream = parser.iterate_many(json,window); + * for(auto i = stream.begin(); i != stream.end(); ++i) { + * std::string_view v = i.source(); + * } + * + * The returned string_view instance is simply a map to the (unparsed) + * source string: it may thus include white-space characters and all manner + * of padding. + * + * This function (source()) is experimental and the usage + * may change in future versions of simdjson: we find the API somewhat + * awkward and we would like to offer something friendlier. + * + */ + simdjson_inline std::string_view source() const noexcept; + + /** + * Returns error of the stream (if any). + */ + inline error_code error() const noexcept; + + private: + simdjson_inline iterator(document_stream *s, bool finished) noexcept; + /** The document_stream we're iterating through. */ + document_stream* stream; + /** Whether we're finished or not. */ + bool finished; + + friend class document; + friend class document_stream; + friend class json_iterator; + }; + + /** + * Start iterating the documents in the stream. + */ + simdjson_inline iterator begin() noexcept; + /** + * The end of the stream, for iterator comparison purposes. + */ + simdjson_inline iterator end() noexcept; + +private: + + document_stream &operator=(const document_stream &) = delete; // Disallow copying + document_stream(const document_stream &other) = delete; // Disallow copying + + /** + * Construct a document_stream. Does not allocate or parse anything until the iterator is + * used. + * + * @param parser is a reference to the parser instance used to generate this document_stream + * @param buf is the raw byte buffer we need to process + * @param len is the length of the raw byte buffer in bytes + * @param batch_size is the size of the windows (must be strictly greater or equal to the largest JSON document) + */ + simdjson_inline document_stream( + ondemand::parser &parser, + const uint8_t *buf, + size_t len, + size_t batch_size, + bool allow_comma_separated + ) noexcept; + + /** + * Parse the first document in the buffer. Used by begin(), to handle allocation and + * initialization. + */ + inline void start() noexcept; + + /** + * Parse the next document found in the buffer previously given to document_stream. + * + * The content should be a valid JSON document encoded as UTF-8. If there is a + * UTF-8 BOM, the caller is responsible for omitting it, UTF-8 BOM are + * discouraged. + * + * You do NOT need to pre-allocate a parser. This function takes care of + * pre-allocating a capacity defined by the batch_size defined when creating the + * document_stream object. + * + * The function returns simdjson::EMPTY if there is no more data to be parsed. + * + * The function returns simdjson::SUCCESS (as integer = 0) in case of success + * and indicates that the buffer has successfully been parsed to the end. + * Every document it contained has been parsed without error. + * + * The function returns an error code from simdjson/simdjson.h in case of failure + * such as simdjson::CAPACITY, simdjson::MEMALLOC, simdjson::DEPTH_ERROR and so forth; + * the simdjson::error_message function converts these error codes into a string). + * + * You can also check validity by calling parser.is_valid(). The same parser can + * and should be reused for the other documents in the buffer. + */ + inline void next() noexcept; + + /** Move the json_iterator of the document to the location of the next document in the stream. */ + inline void next_document() noexcept; + + /** Get the next document index. */ + inline size_t next_batch_start() const noexcept; + + /** Pass the next batch through stage 1 with the given parser. */ + inline error_code run_stage1(ondemand::parser &p, size_t batch_start) noexcept; + + // Fields + ondemand::parser *parser; + const uint8_t *buf; + size_t len; + size_t batch_size; + bool allow_comma_separated; + /** + * We are going to use just one document instance. The document owns + * the json_iterator. It implies that we only ever pass a reference + * to the document to the users. + */ + document doc{}; + /** The error (or lack thereof) from the current document. */ + error_code error; + size_t batch_start{0}; + size_t doc_index{}; + + #ifdef SIMDJSON_THREADS_ENABLED + /** Indicates whether we use threads. Note that this needs to be a constant during the execution of the parsing. */ + bool use_thread; + + inline void load_from_stage1_thread() noexcept; + + /** Start a thread to run stage 1 on the next batch. */ + inline void start_stage1_thread() noexcept; + + /** Wait for the stage 1 thread to finish and capture the results. */ + inline void finish_stage1_thread() noexcept; + + /** The error returned from the stage 1 thread. */ + error_code stage1_thread_error{UNINITIALIZED}; + /** The thread used to run stage 1 against the next batch in the background. */ + std::unique_ptr worker{new(std::nothrow) stage1_worker()}; + /** + * The parser used to run stage 1 in the background. Will be swapped + * with the regular parser when finished. + */ + ondemand::parser stage1_thread_parser{}; + + friend struct stage1_worker; + #endif // SIMDJSON_THREADS_ENABLED + + friend class parser; + friend class document; + friend class json_iterator; + friend struct simdjson_result; + friend struct internal::simdjson_result_base; +}; // document_stream + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::document_stream &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_H +/* end file simdjson/generic/ondemand/document_stream.h for westmere */ +/* including simdjson/generic/ondemand/field.h for westmere: #include "simdjson/generic/ondemand/field.h" */ +/* begin file simdjson/generic/ondemand/field.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A JSON field (key/value pair) in an object. + * + * Returned from object iteration. + * + * Extends from std::pair so you can use C++ algorithms that rely on pairs. + */ +class field : public std::pair { +public: + /** + * Create a new invalid field. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline field() noexcept; + + /** + * Get the key as a string_view (for higher speed, consider raw_key). + * We deliberately use a more cumbersome name (unescaped_key) to force users + * to think twice about using it. + * + * This consumes the key: once you have called unescaped_key(), you cannot + * call it again nor can you call key(). + */ + simdjson_inline simdjson_warn_unused simdjson_result unescaped_key(bool allow_replacement) noexcept; + /** + * Get the key as a raw_json_string. Can be used for direct comparison with + * an unescaped C string: e.g., key() == "test". + */ + simdjson_inline raw_json_string key() const noexcept; + /** + * Get the field value. + */ + simdjson_inline ondemand::value &value() & noexcept; + /** + * @overload ondemand::value &ondemand::value() & noexcept + */ + simdjson_inline ondemand::value value() && noexcept; + +protected: + simdjson_inline field(raw_json_string key, ondemand::value &&value) noexcept; + static simdjson_inline simdjson_result start(value_iterator &parent_iter) noexcept; + static simdjson_inline simdjson_result start(const value_iterator &parent_iter, raw_json_string key) noexcept; + friend struct simdjson_result; + friend class object_iterator; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::field &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result unescaped_key(bool allow_replacement = false) noexcept; + simdjson_inline simdjson_result key() noexcept; + simdjson_inline simdjson_result value() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_H +/* end file simdjson/generic/ondemand/field.h for westmere */ +/* including simdjson/generic/ondemand/object.h for westmere: #include "simdjson/generic/ondemand/object.h" */ +/* begin file simdjson/generic/ondemand/object.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +/** + * A forward-only JSON object field iterator. + */ +class object { +public: + /** + * Create a new invalid object. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + /** + * Look up a field by name on an object (order-sensitive). + * + * The following code reads z, then y, then x, and thus will not retrieve x or y if fed the + * JSON `{ "x": 1, "y": 2, "z": 3 }`: + * + * ```c++ + * simdjson::ondemand::parser parser; + * auto obj = parser.parse(R"( { "x": 1, "y": 2, "z": 3 } )"_padded); + * double z = obj.find_field("z"); + * double y = obj.find_field("y"); + * double x = obj.find_field("x"); + * ``` + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * **Raw Keys:** The lookup will be done against the *raw* key, and will not unescape keys. + * e.g. `object["a"]` will match `{ "a": 1 }`, but will *not* match `{ "\u0061": 1 }`. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a + * key a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() + * is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + + /** + * Look up a field by name on an object, without regard to key order. + * + * **Performance Notes:** This is a bit less performant than find_field(), though its effect varies + * and often appears negligible. It starts out normally, starting out at the last field; but if + * the field is not found, it scans from the beginning of the object to see if it missed it. That + * missing case has a non-cache-friendly bump and lots of extra scanning, especially if the object + * in question is large. The fact that the extra code is there also bumps the executable size. + * + * It is the default, however, because it would be highly surprising (and hard to debug) if the + * default behavior failed to look up a field just because it was in the wrong order--and many + * APIs assume this. Therefore, you must be explicit if you want to treat objects as out of order. + * + * Use find_field() if you are sure fields will be in order (or are willing to treat it as if the + * field wasn't there when they aren't). + * + * If you have multiple fields with a matching key ({"x": 1, "x": 1}) be mindful + * that only one field is returned. + * + * You must consume the fields on an object one at a time. A request for a new key + * invalidates previous field values: it makes them unsafe. The value instance you get + * from `content["bids"]` becomes invalid when you call `content["asks"]`. The array + * given by content["bids"].get_array() should not be accessed after you have called + * content["asks"].get_array(). You can detect such mistakes by first compiling and running + * the code in Debug mode (or with the macro `SIMDJSON_DEVELOPMENT_CHECKS` set to 1): an + * OUT_OF_ORDER_ITERATION error is generated. + * + * You are expected to access keys only once. You should access the value corresponding to a key + * a single time. Doing object["mykey"].to_string() and then again object["mykey"].to_string() is an error. + * + * @param key The key to look up. + * @returns The value of the field, or NO_SUCH_FIELD if the field is not in the object. + */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + /** @overload simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; */ + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + + /** + * Get the value associated with the given JSON pointer. We use the RFC 6901 + * https://tools.ietf.org/html/rfc6901 standard, interpreting the current node + * as the root of its own JSON document. + * + * ondemand::parser parser; + * auto json = R"({ "foo": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("/foo/a/1") == 20 + * + * It is allowed for a key to be the empty string: + * + * ondemand::parser parser; + * auto json = R"({ "": { "a": [ 10, 20, 30 ] }})"_padded; + * auto doc = parser.iterate(json); + * doc.at_pointer("//a/1") == 20 + * + * Note that at_pointer() called on the document automatically calls the document's rewind + * method between each call. It invalidates all previously accessed arrays, objects and values + * that have not been consumed. Yet it is not the case when calling at_pointer on an object + * instance: there is no rewind and no invalidation. + * + * You may call at_pointer more than once on an object, but each time the pointer is advanced + * to be within the value matched by the key indicated by the JSON pointer query. Thus any preceding + * key (as well as the current key) can no longer be used with following JSON pointer calls. + * + * Also note that at_pointer() relies on find_field() which implies that we do not unescape keys when matching. + * + * @return The value associated with the given JSON pointer, or: + * - NO_SUCH_FIELD if a field does not exist in an object + * - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length + * - INCORRECT_TYPE if a non-integer is used to access an array + * - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed + */ + inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + + /** + * Reset the iterator so that we are pointing back at the + * beginning of the object. You should still consume values only once even if you + * can iterate through the object more than once. If you unescape a string within + * the object more than once, you have unsafe code. Note that rewinding an object + * means that you may need to reparse it anew: it is not a free operation. + * + * @returns true if the object contains some elements (not empty) + */ + inline simdjson_result reset() & noexcept; + /** + * This method scans the beginning of the object and checks whether the + * object is empty. + * The runtime complexity is constant time. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + */ + inline simdjson_result is_empty() & noexcept; + /** + * This method scans the object and counts the number of key-value pairs. + * The count_fields method should always be called before you have begun + * iterating through the object: it is expected that you are pointing at + * the beginning of the object. + * The runtime complexity is linear in the size of the object. After + * calling this function, if successful, the object is 'rewinded' at its + * beginning as if it had never been accessed. If the JSON is malformed (e.g., + * there is a missing comma), then an error is returned and it is no longer + * safe to continue. + * + * To check that an object is empty, it is more performant to use + * the is_empty() method. + * + * Performance hint: You should only call count_fields() as a last + * resort as it may require scanning the document twice or more. + */ + simdjson_inline simdjson_result count_fields() & noexcept; + /** + * Consumes the object and returns a string_view instance corresponding to the + * object as represented in JSON. It points inside the original byte array containing + * the JSON document. + */ + simdjson_inline simdjson_result raw_json() noexcept; + +protected: + /** + * Go to the end of the object, no matter where you are right now. + */ + simdjson_inline error_code consume() noexcept; + static simdjson_inline simdjson_result start(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result start_root(value_iterator &iter) noexcept; + static simdjson_inline simdjson_result started(value_iterator &iter) noexcept; + static simdjson_inline object resume(const value_iterator &iter) noexcept; + simdjson_inline object(const value_iterator &iter) noexcept; + + simdjson_warn_unused simdjson_inline error_code find_field_raw(const std::string_view key) noexcept; + + value_iterator iter{}; + + friend class value; + friend class document; + friend struct simdjson_result; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::object &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + simdjson_inline simdjson_result begin() noexcept; + simdjson_inline simdjson_result end() noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field(std::string_view key) && noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) & noexcept; + simdjson_inline simdjson_result find_field_unordered(std::string_view key) && noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) & noexcept; + simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; + simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; + inline simdjson_result reset() noexcept; + inline simdjson_result is_empty() noexcept; + inline simdjson_result count_fields() & noexcept; + inline simdjson_result raw_json() noexcept; + +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_H +/* end file simdjson/generic/ondemand/object.h for westmere */ +/* including simdjson/generic/ondemand/object_iterator.h for westmere: #include "simdjson/generic/ondemand/object_iterator.h" */ +/* begin file simdjson/generic/ondemand/object_iterator.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +class object_iterator { +public: + /** + * Create a new invalid object_iterator. + * + * Exists so you can declare a variable and later assign to it before use. + */ + simdjson_inline object_iterator() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result operator*() noexcept; + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const object_iterator &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const object_iterator &) const noexcept; + // Checks for ']' and ',' + simdjson_inline object_iterator &operator++() noexcept; + +private: + /** + * The underlying JSON iterator. + * + * PERF NOTE: expected to be elided in favor of the parent document: this is set when the object + * is first used, and never changes afterwards. + */ + value_iterator iter{}; + + simdjson_inline object_iterator(const value_iterator &iter) noexcept; + friend struct simdjson_result; + friend class object; +}; + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +template<> +struct simdjson_result : public westmere::implementation_simdjson_result_base { +public: + simdjson_inline simdjson_result(westmere::ondemand::object_iterator &&value) noexcept; ///< @private + simdjson_inline simdjson_result(error_code error) noexcept; ///< @private + simdjson_inline simdjson_result() noexcept = default; + + // + // Iterator interface + // + + // Reads key and value, yielding them to the user. + simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + // Assumes it's being compared with the end. true if depth < iter->depth. + simdjson_inline bool operator==(const simdjson_result &) const noexcept; + // Assumes it's being compared with the end. true if depth >= iter->depth. + simdjson_inline bool operator!=(const simdjson_result &) const noexcept; + // Checks for ']' and ',' + simdjson_inline simdjson_result &operator++() noexcept; +}; + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_H +/* end file simdjson/generic/ondemand/object_iterator.h for westmere */ +/* including simdjson/generic/ondemand/serialization.h for westmere: #include "simdjson/generic/ondemand/serialization.h" */ +/* begin file simdjson/generic/ondemand/serialization.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +/** + * Create a string-view instance out of a document instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept; +/** + * Create a string-view instance out of a value instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. The value must + * not have been accessed previously. It does not + * validate the content. + */ +inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept; +/** + * Create a string-view instance out of an object instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept; +/** + * Create a string-view instance out of an array instance. The string-view instance + * contains JSON text that is suitable to be parsed as JSON again. It does not + * validate the content. + */ +inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept; +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +inline simdjson_result to_json_string(simdjson_result x); +} // namespace simdjson + +/** + * We want to support argument-dependent lookup (ADL). + * Hence we should define operator<< in the namespace + * where the argument (here value, object, etc.) resides. + * Credit: @madhur4127 + * See https://github.com/simdjson/simdjson/issues/1768 + */ +namespace simdjson { namespace westmere { namespace ondemand { + +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The element. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The array. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x); +#endif +/** + * Print JSON to an output stream. It does not + * validate the content. + * + * @param out The output stream. + * @param value The object. + * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + */ +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value); +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +#endif +}}} // namespace simdjson::westmere::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_H +/* end file simdjson/generic/ondemand/serialization.h for westmere */ + +// Inline definitions +/* including simdjson/generic/ondemand/array-inl.h for westmere: #include "simdjson/generic/ondemand/array-inl.h" */ +/* begin file simdjson/generic/ondemand/array-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter->depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the array is first found and the iterator is just past the `{`. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the `,` before the next value (or `]`). In this state, +// depth == iter->depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter->depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter->depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the array iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet element may be missing or not be an +// array--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter->depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between elements, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter->depth == depth, and at_start == false. +// +// ## Terminal State +// +// The terminal state has iter->depth < depth. at_start is always false. +// +// - Finished: When we have reached a `]` or have reported an error, we are finished. We signal this +// by decrementing depth. In this state, iter->depth < depth, at_start == false, and +// error == SUCCESS. +// + +simdjson_inline array::array(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result array::start(value_iterator &iter) noexcept { + // We don't need to know if the array is empty to start iteration, but we do want to know if there + // is an error--thus `simdjson_unused`. + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::start_root(value_iterator &iter) noexcept { + simdjson_unused bool has_value; + SIMDJSON_TRY( iter.start_root_array().get(has_value) ); + return array(iter); +} +simdjson_inline simdjson_result array::started(value_iterator &iter) noexcept { + bool has_value; + SIMDJSON_TRY(iter.started_array().get(has_value)); + return array(iter); +} + +simdjson_inline simdjson_result array::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return array_iterator(iter); +} +simdjson_inline simdjson_result array::end() noexcept { + return array_iterator(iter); +} +simdjson_inline error_code array::consume() noexcept { + auto error = iter.json_iter().skip_child(iter.depth()-1); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result array::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter._json_iter->unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline simdjson_result array::count_elements() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the array after counting the number of elements. + iter.reset_array(); + return count; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline simdjson_result array::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_array().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +inline simdjson_result array::reset() & noexcept { + return iter.reset_array(); +} + +inline simdjson_result array::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + // - means "the append position" or "the element after the end of the array" + // We don't support this, because we're returning a real element, not a position. + if (json_pointer == "-") { return INDEX_OUT_OF_BOUNDS; } + + // Read the array index + size_t array_index = 0; + size_t i; + for (i = 0; i < json_pointer.length() && json_pointer[i] != '/'; i++) { + uint8_t digit = uint8_t(json_pointer[i] - '0'); + // Check for non-digit in array index. If it's there, we're trying to get a field in an object + if (digit > 9) { return INCORRECT_TYPE; } + array_index = array_index*10 + digit; + } + + // 0 followed by other digits is invalid + if (i > 1 && json_pointer[0] == '0') { return INVALID_JSON_POINTER; } // "JSON pointer array index has other characters after 0" + + // Empty string is invalid; so is a "/" with no digits before it + if (i == 0) { return INVALID_JSON_POINTER; } // "Empty string in JSON pointer array index" + // Get the child + auto child = at(array_index); + // If there is an error, it ends here + if(child.error()) { + return child; + } + + // If there is a /, we're not done yet, call recursively. + if (i < json_pointer.length()) { + child = child.at_pointer(json_pointer.substr(i)); + } + return child; +} + +simdjson_inline simdjson_result array::at(size_t index) noexcept { + size_t i = 0; + for (auto value : *this) { + if (i == index) { return value; } + i++; + } + return INDEX_OUT_OF_BOUNDS; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::array &&value +) noexcept + : implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept + : implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::is_empty() & noexcept { + if (error()) { return error(); } + return first.is_empty(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_INL_H +/* end file simdjson/generic/ondemand/array-inl.h for westmere */ +/* including simdjson/generic/ondemand/array_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/array_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline array_iterator::array_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result array_iterator::operator*() noexcept { + if (iter.error()) { iter.abandon(); return iter.error(); } + return value(iter.child()); +} +simdjson_inline bool array_iterator::operator==(const array_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool array_iterator::operator!=(const array_iterator &) const noexcept { + return iter.is_open(); +} +simdjson_inline array_iterator &array_iterator::operator++() noexcept { + error_code error; + // PERF NOTE this is a safety rail ... users should exit loops as soon as they receive an error, so we'll never get here. + // However, it does not seem to make a perf difference, so we add it out of an abundance of caution. + if (( error = iter.error() )) { return *this; } + if (( error = iter.skip_child() )) { return *this; } + if (( error = iter.has_next_element().error() )) { return *this; } + return *this; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::array_iterator &&value +) noexcept + : westmere::implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : westmere::implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++(first); + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/array_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/document-inl.h for westmere: #include "simdjson/generic/ondemand/document-inl.h" */ +/* begin file simdjson/generic/ondemand/document-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline document::document(ondemand::json_iterator &&_iter) noexcept + : iter{std::forward(_iter)} +{ + logger::log_start_value(iter, "document"); +} + +simdjson_inline document document::start(json_iterator &&iter) noexcept { + return document(std::forward(iter)); +} + +inline void document::rewind() noexcept { + iter.rewind(); +} + +inline std::string document::to_debug_string() noexcept { + return iter.to_string(); +} + +inline simdjson_result document::current_location() const noexcept { + return iter.current_location(); +} + +inline int32_t document::current_depth() const noexcept { + return iter.depth(); +} + +inline bool document::at_end() const noexcept { + return iter.at_end(); +} + + +inline bool document::is_alive() noexcept { + return iter.is_alive(); +} +simdjson_inline value_iterator document::resume_value_iterator() noexcept { + return value_iterator(&iter, 1, iter.root_position()); +} +simdjson_inline value_iterator document::get_root_value_iterator() noexcept { + return resume_value_iterator(); +} +simdjson_inline simdjson_result document::start_or_resume_object() noexcept { + if (iter.at_root()) { + return get_object(); + } else { + return object::resume(resume_value_iterator()); + } +} +simdjson_inline simdjson_result document::get_value() noexcept { + // Make sure we start any arrays or objects before returning, so that start_root_() + // gets called. + iter.assert_at_document_depth(); + switch (*iter.peek()) { + case '[': { + // The following lines check that the document ends with ]. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_array(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + case '{': { + // The following lines would check that the document ends with }. + auto value_iterator = get_root_value_iterator(); + auto error = value_iterator.check_root_object(); + if(error) { return error; } + return value(get_root_value_iterator()); + } + default: + // Unfortunately, scalar documents are a special case in simdjson and they cannot + // be safely converted to value instances. + return SCALAR_DOCUMENT_AS_VALUE; + } +} +simdjson_inline simdjson_result document::get_array() & noexcept { + auto value = get_root_value_iterator(); + return array::start_root(value); +} +simdjson_inline simdjson_result document::get_object() & noexcept { + auto value = get_root_value_iterator(); + return object::start_root(value); +} + +/** + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. We want to disallow trailing + * content. + * Thus, in several implementations below, we pass a 'true' parameter value to + * a get_root_value_iterator() method: this indicates that we disallow trailing content. + */ + +simdjson_inline simdjson_result document::get_uint64() noexcept { + return get_root_value_iterator().get_root_uint64(true); +} +simdjson_inline simdjson_result document::get_uint64_in_string() noexcept { + return get_root_value_iterator().get_root_uint64_in_string(true); +} +simdjson_inline simdjson_result document::get_int64() noexcept { + return get_root_value_iterator().get_root_int64(true); +} +simdjson_inline simdjson_result document::get_int64_in_string() noexcept { + return get_root_value_iterator().get_root_int64_in_string(true); +} +simdjson_inline simdjson_result document::get_double() noexcept { + return get_root_value_iterator().get_root_double(true); +} +simdjson_inline simdjson_result document::get_double_in_string() noexcept { + return get_root_value_iterator().get_root_double_in_string(true); +} +simdjson_inline simdjson_result document::get_string(bool allow_replacement) noexcept { + return get_root_value_iterator().get_root_string(true, allow_replacement); +} +simdjson_inline simdjson_result document::get_wobbly_string() noexcept { + return get_root_value_iterator().get_root_wobbly_string(true); +} +simdjson_inline simdjson_result document::get_raw_json_string() noexcept { + return get_root_value_iterator().get_root_raw_json_string(true); +} +simdjson_inline simdjson_result document::get_bool() noexcept { + return get_root_value_iterator().get_root_bool(true); +} +simdjson_inline simdjson_result document::is_null() noexcept { + return get_root_value_iterator().is_root_null(true); +} + +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_array(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_object(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_double(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_bool(); } +template<> simdjson_inline simdjson_result document::get() & noexcept { return get_value(); } + +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_double(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_uint64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_int64(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return std::forward(*this).get_bool(); } +template<> simdjson_inline simdjson_result document::get() && noexcept { return get_value(); } + +template simdjson_inline error_code document::get(T &out) & noexcept { + return get().get(out); +} +template simdjson_inline error_code document::get(T &out) && noexcept { + return std::forward(*this).get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document::operator array() & noexcept(false) { return get_array(); } +simdjson_inline document::operator object() & noexcept(false) { return get_object(); } +simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document::operator double() noexcept(false) { return get_double(); } +simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document::operator value() noexcept(false) { return get_value(); } + +#endif +simdjson_inline simdjson_result document::count_elements() & noexcept { + auto a = get_array(); + simdjson_result answer = a.count_elements(); + /* If there was an array, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::count_fields() & noexcept { + auto a = get_object(); + simdjson_result answer = a.count_fields(); + /* If there was an object, we are now left pointing at its first element. */ + if(answer.error() == SUCCESS) { rewind(); } + return answer; +} +simdjson_inline simdjson_result document::at(size_t index) & noexcept { + auto a = get_array(); + return a.at(index); +} +simdjson_inline simdjson_result document::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result document::end() & noexcept { + return {}; +} + +simdjson_inline simdjson_result document::find_field(std::string_view key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field(const char *key) & noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result document::find_field_unordered(std::string_view key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::find_field_unordered(const char *key) & noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result document::operator[](std::string_view key) & noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result document::operator[](const char *key) & noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline error_code document::consume() noexcept { + auto error = iter.skip_child(0); + if(error) { iter.abandon(); } + return error; +} + +simdjson_inline simdjson_result document::raw_json() noexcept { + auto _iter = get_root_value_iterator(); + const uint8_t * starting_point{_iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + // After 'consume()', we could be left pointing just beyond the document, but that + // is ok because we are not going to dereference the final pointer position, we just + // use it to compute the length in bytes. + const uint8_t * final_point{iter.unsafe_pointer()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result document::type() noexcept { + return get_root_value_iterator().type(); +} + +simdjson_inline simdjson_result document::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool document::is_negative() noexcept { + return get_root_value_iterator().is_root_negative(); +} + +simdjson_inline simdjson_result document::is_integer() noexcept { + return get_root_value_iterator().is_root_integer(true); +} + +simdjson_inline simdjson_result document::get_number_type() noexcept { + return get_root_value_iterator().get_root_number_type(true); +} + +simdjson_inline simdjson_result document::get_number() noexcept { + return get_root_value_iterator().get_root_number(true); +} + + +simdjson_inline simdjson_result document::raw_json_token() noexcept { + auto _iter = get_root_value_iterator(); + return std::string_view(reinterpret_cast(_iter.peek_start()), _iter.peek_start_length()); +} + +simdjson_inline simdjson_result document::at_pointer(std::string_view json_pointer) noexcept { + rewind(); // Rewind the document each time at_pointer is called + if (json_pointer.empty()) { + return this->get_value(); + } + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::document &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base( + error + ) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template +simdjson_inline simdjson_result simdjson_result::get() & noexcept { + if (error()) { return error(); } + return first.get(); +} +template +simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first).get(); +} +template +simdjson_inline error_code simdjson_result::get(T &out) & noexcept { + if (error()) { return error(); } + return first.get(out); +} +template +simdjson_inline error_code simdjson_result::get(T &out) && noexcept { + if (error()) { return error(); } + return std::forward(first).get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() & noexcept = delete; +template<> simdjson_inline simdjson_result simdjson_result::get() && noexcept { + if (error()) { return error(); } + return std::forward(first); +} +template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) & noexcept = delete; +template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::document &out) && noexcept { + if (error()) { return error(); } + out = std::forward(first); + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} + +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} + + +simdjson_inline bool simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} + +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} + +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} + +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} + + +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline bool simdjson_result::at_end() const noexcept { + if (error()) { return error(); } + return first.at_end(); +} + + +simdjson_inline int32_t simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline document_reference::document_reference() noexcept : doc{nullptr} {} +simdjson_inline document_reference::document_reference(document &d) noexcept : doc(&d) {} +simdjson_inline void document_reference::rewind() noexcept { doc->rewind(); } +simdjson_inline simdjson_result document_reference::get_array() & noexcept { return doc->get_array(); } +simdjson_inline simdjson_result document_reference::get_object() & noexcept { return doc->get_object(); } +/** + * The document_reference instances are used primarily/solely for streams of JSON + * documents. + * We decided that calling 'get_double()' on the JSON document '1.233 blabla' should + * give an error, so we check for trailing content. + * + * However, for streams of JSON documents, we want to be able to start from + * "321" "321" "321" + * and parse it successfully as a stream of JSON documents, calling get_uint64_in_string() + * successfully each time. + * + * To achieve this result, we pass a 'false' to a get_root_value_iterator() method: + * this indicates that we allow trailing content. + */ +simdjson_inline simdjson_result document_reference::get_uint64() noexcept { return doc->get_root_value_iterator().get_root_uint64(false); } +simdjson_inline simdjson_result document_reference::get_uint64_in_string() noexcept { return doc->get_root_value_iterator().get_root_uint64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_int64() noexcept { return doc->get_root_value_iterator().get_root_int64(false); } +simdjson_inline simdjson_result document_reference::get_int64_in_string() noexcept { return doc->get_root_value_iterator().get_root_int64_in_string(false); } +simdjson_inline simdjson_result document_reference::get_double() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_double_in_string() noexcept { return doc->get_root_value_iterator().get_root_double(false); } +simdjson_inline simdjson_result document_reference::get_string(bool allow_replacement) noexcept { return doc->get_root_value_iterator().get_root_string(false, allow_replacement); } +simdjson_inline simdjson_result document_reference::get_wobbly_string() noexcept { return doc->get_root_value_iterator().get_root_wobbly_string(false); } +simdjson_inline simdjson_result document_reference::get_raw_json_string() noexcept { return doc->get_root_value_iterator().get_root_raw_json_string(false); } +simdjson_inline simdjson_result document_reference::get_bool() noexcept { return doc->get_root_value_iterator().get_root_bool(false); } +simdjson_inline simdjson_result document_reference::get_value() noexcept { return doc->get_value(); } +simdjson_inline simdjson_result document_reference::is_null() noexcept { return doc->get_root_value_iterator().is_root_null(false); } + +#if SIMDJSON_EXCEPTIONS +simdjson_inline document_reference::operator array() & noexcept(false) { return array(*doc); } +simdjson_inline document_reference::operator object() & noexcept(false) { return object(*doc); } +simdjson_inline document_reference::operator uint64_t() noexcept(false) { return get_uint64(); } +simdjson_inline document_reference::operator int64_t() noexcept(false) { return get_int64(); } +simdjson_inline document_reference::operator double() noexcept(false) { return get_double(); } +simdjson_inline document_reference::operator std::string_view() noexcept(false) { return std::string_view(*doc); } +simdjson_inline document_reference::operator raw_json_string() noexcept(false) { return raw_json_string(*doc); } +simdjson_inline document_reference::operator bool() noexcept(false) { return get_bool(); } +simdjson_inline document_reference::operator value() noexcept(false) { return value(*doc); } +#endif +simdjson_inline simdjson_result document_reference::count_elements() & noexcept { return doc->count_elements(); } +simdjson_inline simdjson_result document_reference::count_fields() & noexcept { return doc->count_fields(); } +simdjson_inline simdjson_result document_reference::at(size_t index) & noexcept { return doc->at(index); } +simdjson_inline simdjson_result document_reference::begin() & noexcept { return doc->begin(); } +simdjson_inline simdjson_result document_reference::end() & noexcept { return doc->end(); } +simdjson_inline simdjson_result document_reference::find_field(std::string_view key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::find_field(const char *key) & noexcept { return doc->find_field(key); } +simdjson_inline simdjson_result document_reference::operator[](std::string_view key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::operator[](const char *key) & noexcept { return (*doc)[key]; } +simdjson_inline simdjson_result document_reference::find_field_unordered(std::string_view key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::find_field_unordered(const char *key) & noexcept { return doc->find_field_unordered(key); } +simdjson_inline simdjson_result document_reference::type() noexcept { return doc->type(); } +simdjson_inline simdjson_result document_reference::is_scalar() noexcept { return doc->is_scalar(); } +simdjson_inline simdjson_result document_reference::current_location() noexcept { return doc->current_location(); } +simdjson_inline int32_t document_reference::current_depth() const noexcept { return doc->current_depth(); } +simdjson_inline bool document_reference::is_negative() noexcept { return doc->is_negative(); } +simdjson_inline simdjson_result document_reference::is_integer() noexcept { return doc->get_root_value_iterator().is_root_integer(false); } +simdjson_inline simdjson_result document_reference::get_number_type() noexcept { return doc->get_root_value_iterator().get_root_number_type(false); } +simdjson_inline simdjson_result document_reference::get_number() noexcept { return doc->get_root_value_iterator().get_root_number(false); } +simdjson_inline simdjson_result document_reference::raw_json_token() noexcept { return doc->raw_json_token(); } +simdjson_inline simdjson_result document_reference::at_pointer(std::string_view json_pointer) noexcept { return doc->at_pointer(json_pointer); } +simdjson_inline simdjson_result document_reference::raw_json() noexcept { return doc->raw_json();} +simdjson_inline document_reference::operator document&() const noexcept { return *doc; } + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + + + +namespace simdjson { +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::document_reference value, error_code error) + noexcept : implementation_simdjson_result_base(std::forward(value), error) {} + + +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) & noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline error_code simdjson_result::rewind() noexcept { + if (error()) { return error(); } + first.rewind(); + return SUCCESS; +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::get_array() & noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() & noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::get_value() noexcept { + if (error()) { return error(); } + return first.get_value(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator westmere::ondemand::array() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::object() & noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::value() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_INL_H +/* end file simdjson/generic/ondemand/document-inl.h for westmere */ +/* including simdjson/generic/ondemand/document_stream-inl.h for westmere: #include "simdjson/generic/ondemand/document_stream-inl.h" */ +/* begin file simdjson/generic/ondemand/document_stream-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace westmere { +namespace ondemand { + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void stage1_worker::finish() { + // After calling "run" someone would call finish() to wait + // for the end of the processing. + // This function will wait until either the thread has done + // the processing or, else, the destructor has been called. + std::unique_lock lock(locking_mutex); + cond_var.wait(lock, [this]{return has_work == false;}); +} + +inline stage1_worker::~stage1_worker() { + // The thread may never outlive the stage1_worker instance + // and will always be stopped/joined before the stage1_worker + // instance is gone. + stop_thread(); +} + +inline void stage1_worker::start_thread() { + std::unique_lock lock(locking_mutex); + if(thread.joinable()) { + return; // This should never happen but we never want to create more than one thread. + } + thread = std::thread([this]{ + while(true) { + std::unique_lock thread_lock(locking_mutex); + // We wait for either "run" or "stop_thread" to be called. + cond_var.wait(thread_lock, [this]{return has_work || !can_work;}); + // If, for some reason, the stop_thread() method was called (i.e., the + // destructor of stage1_worker is called, then we want to immediately destroy + // the thread (and not do any more processing). + if(!can_work) { + break; + } + this->owner->stage1_thread_error = this->owner->run_stage1(*this->stage1_thread_parser, + this->_next_batch_start); + this->has_work = false; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify "finish" + thread_lock.unlock(); + } + } + ); +} + + +inline void stage1_worker::stop_thread() { + std::unique_lock lock(locking_mutex); + // We have to make sure that all locks can be released. + can_work = false; + has_work = false; + cond_var.notify_all(); + lock.unlock(); + if(thread.joinable()) { + thread.join(); + } +} + +inline void stage1_worker::run(document_stream * ds, parser * stage1, size_t next_batch_start) { + std::unique_lock lock(locking_mutex); + owner = ds; + _next_batch_start = next_batch_start; + stage1_thread_parser = stage1; + has_work = true; + // The condition variable call should be moved after thread_lock.unlock() for performance + // reasons but thread sanitizers may report it as a data race if we do. + // See https://stackoverflow.com/questions/35775501/c-should-condition-variable-be-notified-under-lock + cond_var.notify_one(); // will notify the thread lock that we have work + lock.unlock(); +} + +#endif // SIMDJSON_THREADS_ENABLED + +simdjson_inline document_stream::document_stream( + ondemand::parser &_parser, + const uint8_t *_buf, + size_t _len, + size_t _batch_size, + bool _allow_comma_separated +) noexcept + : parser{&_parser}, + buf{_buf}, + len{_len}, + batch_size{_batch_size <= MINIMAL_BATCH_SIZE ? MINIMAL_BATCH_SIZE : _batch_size}, + allow_comma_separated{_allow_comma_separated}, + error{SUCCESS} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(_parser.threaded) // we need to make a copy because _parser.threaded can change + #endif +{ +#ifdef SIMDJSON_THREADS_ENABLED + if(worker.get() == nullptr) { + error = MEMALLOC; + } +#endif +} + +simdjson_inline document_stream::document_stream() noexcept + : parser{nullptr}, + buf{nullptr}, + len{0}, + batch_size{0}, + allow_comma_separated{false}, + error{UNINITIALIZED} + #ifdef SIMDJSON_THREADS_ENABLED + , use_thread(false) + #endif +{ +} + +simdjson_inline document_stream::~document_stream() noexcept +{ + #ifdef SIMDJSON_THREADS_ENABLED + worker.reset(); + #endif +} + +inline size_t document_stream::size_in_bytes() const noexcept { + return len; +} + +inline size_t document_stream::truncated_bytes() const noexcept { + if(error == CAPACITY) { return len - batch_start; } + return parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] - parser->implementation->structural_indexes[parser->implementation->n_structural_indexes + 1]; +} + +simdjson_inline document_stream::iterator::iterator() noexcept + : stream{nullptr}, finished{true} { +} + +simdjson_inline document_stream::iterator::iterator(document_stream* _stream, bool is_end) noexcept + : stream{_stream}, finished{is_end} { +} + +simdjson_inline simdjson_result document_stream::iterator::operator*() noexcept { + //if(stream->error) { return stream->error; } + return simdjson_result(stream->doc, stream->error); +} + +simdjson_inline document_stream::iterator& document_stream::iterator::operator++() noexcept { + // If there is an error, then we want the iterator + // to be finished, no matter what. (E.g., we do not + // keep generating documents with errors, or go beyond + // a document with errors.) + // + // Users do not have to call "operator*()" when they use operator++, + // so we need to end the stream in the operator++ function. + // + // Note that setting finished = true is essential otherwise + // we would enter an infinite loop. + if (stream->error) { finished = true; } + // Note that stream->error() is guarded against error conditions + // (it will immediately return if stream->error casts to false). + // In effect, this next function does nothing when (stream->error) + // is true (hence the risk of an infinite loop). + stream->next(); + // If that was the last document, we're finished. + // It is the only type of error we do not want to appear + // in operator*. + if (stream->error == EMPTY) { finished = true; } + // If we had any other kind of error (not EMPTY) then we want + // to pass it along to the operator* and we cannot mark the result + // as "finished" just yet. + return *this; +} + +simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { + return finished != other.finished; +} + +simdjson_inline document_stream::iterator document_stream::begin() noexcept { + start(); + // If there are no documents, we're finished. + return iterator(this, error == EMPTY); +} + +simdjson_inline document_stream::iterator document_stream::end() noexcept { + return iterator(this, true); +} + +inline void document_stream::start() noexcept { + if (error) { return; } + error = parser->allocate(batch_size); + if (error) { return; } + // Always run the first stage 1 parse immediately + batch_start = 0; + error = run_stage1(*parser, batch_start); + while(error == EMPTY) { + // In exceptional cases, we may start with an empty block + batch_start = next_batch_start(); + if (batch_start >= len) { return; } + error = run_stage1(*parser, batch_start); + } + if (error) { return; } + doc_index = batch_start; + doc = document(json_iterator(&buf[batch_start], parser)); + doc.iter._streaming = true; + + #ifdef SIMDJSON_THREADS_ENABLED + if (use_thread && next_batch_start() < len) { + // Kick off the first thread on next batch if needed + error = stage1_thread_parser.allocate(batch_size); + if (error) { return; } + worker->start_thread(); + start_stage1_thread(); + if (error) { return; } + } + #endif // SIMDJSON_THREADS_ENABLED +} + +inline void document_stream::next() noexcept { + // We always enter at once once in an error condition. + if (error) { return; } + next_document(); + if (error) { return; } + auto cur_struct_index = doc.iter._root - parser->implementation->structural_indexes.get(); + doc_index = batch_start + parser->implementation->structural_indexes[cur_struct_index]; + + // Check if at end of structural indexes (i.e. at end of batch) + if(cur_struct_index >= static_cast(parser->implementation->n_structural_indexes)) { + error = EMPTY; + // Load another batch (if available) + while (error == EMPTY) { + batch_start = next_batch_start(); + if (batch_start >= len) { break; } + #ifdef SIMDJSON_THREADS_ENABLED + if(use_thread) { + load_from_stage1_thread(); + } else { + error = run_stage1(*parser, batch_start); + } + #else + error = run_stage1(*parser, batch_start); + #endif + /** + * Whenever we move to another window, we need to update all pointers to make + * it appear as if the input buffer started at the beginning of the window. + * + * Take this input: + * + * {"z":5} {"1":1,"2":2,"4":4} [7, 10, 9] [15, 11, 12, 13] [154, 110, 112, 1311] + * + * Say you process the following window... + * + * '{"z":5} {"1":1,"2":2,"4":4} [7, 10, 9]' + * + * When you do so, the json_iterator has a pointer at the beginning of the memory region + * (pointing at the beginning of '{"z"...'. + * + * When you move to the window that starts at... + * + * '[7, 10, 9] [15, 11, 12, 13] ... + * + * then it is not sufficient to just run stage 1. You also need to re-anchor the + * json_iterator so that it believes we are starting at '[7, 10, 9]...'. + * + * Under the DOM front-end, this gets done automatically because the parser owns + * the pointer the data, and when you call stage1 and then stage2 on the same + * parser, then stage2 will run on the pointer acquired by stage1. + * + * That is, stage1 calls "this->buf = _buf" so the parser remembers the buffer that + * we used. But json_iterator has no callback when stage1 is called on the parser. + * In fact, I think that the parser is unaware of json_iterator. + * + * + * So we need to re-anchor the json_iterator after each call to stage 1 so that + * all of the pointers are in sync. + */ + doc.iter = json_iterator(&buf[batch_start], parser); + doc.iter._streaming = true; + /** + * End of resync. + */ + + if (error) { continue; } // If the error was EMPTY, we may want to load another batch. + doc_index = batch_start; + } + } +} + +inline void document_stream::next_document() noexcept { + // Go to next place where depth=0 (document depth) + error = doc.iter.skip_child(0); + if (error) { return; } + // Always set depth=1 at the start of document + doc.iter._depth = 1; + // consume comma if comma separated is allowed + if (allow_comma_separated) { doc.iter.consume_character(','); } + // Resets the string buffer at the beginning, thus invalidating the strings. + doc.iter._string_buf_loc = parser->string_buf.get(); + doc.iter._root = doc.iter.position(); +} + +inline size_t document_stream::next_batch_start() const noexcept { + return batch_start + parser->implementation->structural_indexes[parser->implementation->n_structural_indexes]; +} + +inline error_code document_stream::run_stage1(ondemand::parser &p, size_t _batch_start) noexcept { + // This code only updates the structural index in the parser, it does not update any json_iterator + // instance. + size_t remaining = len - _batch_start; + if (remaining <= batch_size) { + return p.implementation->stage1(&buf[_batch_start], remaining, stage1_mode::streaming_final); + } else { + return p.implementation->stage1(&buf[_batch_start], batch_size, stage1_mode::streaming_partial); + } +} + +simdjson_inline size_t document_stream::iterator::current_index() const noexcept { + return stream->doc_index; +} + +simdjson_inline std::string_view document_stream::iterator::source() const noexcept { + auto depth = stream->doc.iter.depth(); + auto cur_struct_index = stream->doc.iter._root - stream->parser->implementation->structural_indexes.get(); + + // If at root, process the first token to determine if scalar value + if (stream->doc.iter.at_root()) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': // Depth=1 already at start of document + break; + case '}': case ']': + depth--; + break; + default: // Scalar value document + // TODO: Remove any trailing whitespaces + // This returns a string spanning from start of value to the beginning of the next document (excluded) + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[++cur_struct_index] - current_index() - 1); + } + cur_struct_index++; + } + + while (cur_struct_index <= static_cast(stream->parser->implementation->n_structural_indexes)) { + switch (stream->buf[stream->batch_start + stream->parser->implementation->structural_indexes[cur_struct_index]]) { + case '{': case '[': + depth++; + break; + case '}': case ']': + depth--; + break; + } + if (depth == 0) { break; } + cur_struct_index++; + } + + return std::string_view(reinterpret_cast(stream->buf) + current_index(), stream->parser->implementation->structural_indexes[cur_struct_index] - current_index() + stream->batch_start + 1);; +} + +inline error_code document_stream::iterator::error() const noexcept { + return stream->error; +} + +#ifdef SIMDJSON_THREADS_ENABLED + +inline void document_stream::load_from_stage1_thread() noexcept { + worker->finish(); + // Swap to the parser that was loaded up in the thread. Make sure the parser has + // enough memory to swap to, as well. + std::swap(stage1_thread_parser,*parser); + error = stage1_thread_error; + if (error) { return; } + + // If there's anything left, start the stage 1 thread! + if (next_batch_start() < len) { + start_stage1_thread(); + } +} + +inline void document_stream::start_stage1_thread() noexcept { + // we call the thread on a lambda that will update + // this->stage1_thread_error + // there is only one thread that may write to this value + // TODO this is NOT exception-safe. + this->stage1_thread_error = UNINITIALIZED; // In case something goes wrong, make sure it's an error + size_t _next_batch_start = this->next_batch_start(); + + worker->run(this, & this->stage1_thread_parser, _next_batch_start); +} + +#endif // SIMDJSON_THREADS_ENABLED + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::document_stream &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} + +} + +#endif // SIMDJSON_GENERIC_ONDEMAND_DOCUMENT_STREAM_INL_H +/* end file simdjson/generic/ondemand/document_stream-inl.h for westmere */ +/* including simdjson/generic/ondemand/field-inl.h for westmere: #include "simdjson/generic/ondemand/field-inl.h" */ +/* begin file simdjson/generic/ondemand/field-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +// clang 6 doesn't think the default constructor can be noexcept, so we make it explicit +simdjson_inline field::field() noexcept : std::pair() {} + +simdjson_inline field::field(raw_json_string key, ondemand::value &&value) noexcept + : std::pair(key, std::forward(value)) +{ +} + +simdjson_inline simdjson_result field::start(value_iterator &parent_iter) noexcept { + raw_json_string key; + SIMDJSON_TRY( parent_iter.field_key().get(key) ); + SIMDJSON_TRY( parent_iter.field_value() ); + return field::start(parent_iter, key); +} + +simdjson_inline simdjson_result field::start(const value_iterator &parent_iter, raw_json_string key) noexcept { + return field(key, parent_iter.child()); +} + +simdjson_inline simdjson_warn_unused simdjson_result field::unescaped_key(bool allow_replacement) noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() but Visual Studio won't let us. + simdjson_result answer = first.unescape(second.iter.json_iter(), allow_replacement); + first.consume(); + return answer; +} + +simdjson_inline raw_json_string field::key() const noexcept { + SIMDJSON_ASSUME(first.buf != nullptr); // We would like to call .alive() by Visual Studio won't let us. + return first; +} + +simdjson_inline value &field::value() & noexcept { + return second; +} + +simdjson_inline value field::value() && noexcept { + return std::forward(*this).second; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::field &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} + +simdjson_inline simdjson_result simdjson_result::key() noexcept { + if (error()) { return error(); } + return first.key(); +} +simdjson_inline simdjson_result simdjson_result::unescaped_key(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.unescaped_key(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::value() noexcept { + if (error()) { return error(); } + return std::move(first.value()); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_FIELD_INL_H +/* end file simdjson/generic/ondemand/field-inl.h for westmere */ +/* including simdjson/generic/ondemand/json_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline json_iterator::json_iterator(json_iterator &&other) noexcept + : token(std::forward(other.token)), + parser{other.parser}, + _string_buf_loc{other._string_buf_loc}, + error{other.error}, + _depth{other._depth}, + _root{other._root}, + _streaming{other._streaming} +{ + other.parser = nullptr; +} +simdjson_inline json_iterator &json_iterator::operator=(json_iterator &&other) noexcept { + token = other.token; + parser = other.parser; + _string_buf_loc = other._string_buf_loc; + error = other.error; + _depth = other._depth; + _root = other._root; + _streaming = other._streaming; + other.parser = nullptr; + return *this; +} + +simdjson_inline json_iterator::json_iterator(const uint8_t *buf, ondemand::parser *_parser) noexcept + : token(buf, &_parser->implementation->structural_indexes[0]), + parser{_parser}, + _string_buf_loc{parser->string_buf.get()}, + _depth{1}, + _root{parser->implementation->structural_indexes.get()}, + _streaming{false} + +{ + logger::log_headers(); +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif +} + +inline void json_iterator::rewind() noexcept { + token.set_position( root_position() ); + logger::log_headers(); // We start again + _string_buf_loc = parser->string_buf.get(); + _depth = 1; +} + +inline bool json_iterator::balanced() const noexcept { + token_iterator ti(token); + int32_t count{0}; + ti.set_position( root_position() ); + while(ti.peek() <= peek_last()) { + switch (*ti.return_current_and_advance()) + { + case '[': case '{': + count++; + break; + case ']': case '}': + count--; + break; + default: + break; + } + } + return count == 0; +} + + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and parent_depth, which is a desired effect. The warning does not show up if the +// skip_child() function is not marked inline). +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline error_code json_iterator::skip_child(depth_t parent_depth) noexcept { + if (depth() <= parent_depth) { return SUCCESS; } + switch (*return_current_and_advance()) { + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + + // For the first open array/object in a value, we've already incremented depth, so keep it the same + // We never stop at colon, but if we did, it wouldn't affect depth + case '[': case '{': case ':': + logger::log_start_value(*this, "skip"); + break; + // If there is a comma, we have just finished a value in an array/object, and need to get back in + case ',': + logger::log_value(*this, "skip"); + break; + // ] or } means we just finished a value and need to jump out of the array/object + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } +#if SIMDJSON_CHECK_EOF + // If there are no more tokens, the parent is incomplete. + if (at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "Missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + break; + case '"': + if(*peek() == ':') { + // We are at a key!!! + // This might happen if you just started an object and you skip it immediately. + // Performance note: it would be nice to get rid of this check as it is somewhat + // expensive. + // https://github.com/simdjson/simdjson/issues/1742 + logger::log_value(*this, "key"); + return_current_and_advance(); // eat up the ':' + break; // important!!! + } + simdjson_fallthrough; + // Anything else must be a scalar value + default: + // For the first scalar, we will have incremented depth already, so we decrement it here. + logger::log_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + } + + // Now that we've considered the first value, we only increment/decrement for arrays/objects + while (position() < end_position()) { + switch (*return_current_and_advance()) { + case '[': case '{': + logger::log_start_value(*this, "skip"); + _depth++; + break; + // TODO consider whether matching braces is a requirement: if non-matching braces indicates + // *missing* braces, then future lookups are not in the object/arrays they think they are, + // violating the rule "validate enough structure that the user can be confident they are + // looking at the right values." + // PERF TODO we can eliminate the switch here with a lookup of how much to add to depth + case ']': case '}': + logger::log_end_value(*this, "skip"); + _depth--; + if (depth() <= parent_depth) { return SUCCESS; } + break; + default: + logger::log_value(*this, "skip", ""); + break; + } + } + + return report_error(TAPE_ERROR, "not enough close braces"); +} + +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool json_iterator::at_root() const noexcept { + return position() == root_position(); +} + +simdjson_inline bool json_iterator::is_single_token() const noexcept { + return parser->implementation->n_structural_indexes == 1; +} + +simdjson_inline bool json_iterator::streaming() const noexcept { + return _streaming; +} + +simdjson_inline token_position json_iterator::root_position() const noexcept { + return _root; +} + +simdjson_inline void json_iterator::assert_at_document_depth() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +} + +simdjson_inline void json_iterator::assert_at_root() const noexcept { + SIMDJSON_ASSUME( _depth == 1 ); +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + // Under Visual Studio, the next SIMDJSON_ASSUME fails with: the argument + // has side effects that will be discarded. + SIMDJSON_ASSUME( token.position() == _root ); +#endif +} + +simdjson_inline void json_iterator::assert_more_tokens(uint32_t required_tokens) const noexcept { + assert_valid_position(token._position + required_tokens - 1); +} + +simdjson_inline void json_iterator::assert_valid_position(token_position position) const noexcept { +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); + SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#endif +} + +simdjson_inline bool json_iterator::at_end() const noexcept { + return position() == end_position(); +} +simdjson_inline token_position json_iterator::end_position() const noexcept { + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + return &parser->implementation->structural_indexes[n_structural_indexes]; +} + +inline std::string json_iterator::to_string() const noexcept { + if( !is_alive() ) { return "dead json_iterator instance"; } + const char * current_structural = reinterpret_cast(token.peek()); + return std::string("json_iterator [ depth : ") + std::to_string(_depth) + + std::string(", structural : '") + std::string(current_structural,1) + + std::string("', offset : ") + std::to_string(token.current_offset()) + + std::string("', error : ") + error_message(error) + + std::string(" ]"); +} + +inline simdjson_result json_iterator::current_location() const noexcept { + if (!is_alive()) { // Unrecoverable error + if (!at_root()) { + return reinterpret_cast(token.peek(-1)); + } else { + return reinterpret_cast(token.peek()); + } + } + if (at_end()) { + return OUT_OF_BOUNDS; + } + return reinterpret_cast(token.peek()); +} + +simdjson_inline bool json_iterator::is_alive() const noexcept { + return parser; +} + +simdjson_inline void json_iterator::abandon() noexcept { + parser = nullptr; + _depth = 0; +} + +simdjson_inline const uint8_t *json_iterator::return_current_and_advance() noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(); +#endif // SIMDJSON_CHECK_EOF + return token.return_current_and_advance(); +} + +simdjson_inline const uint8_t *json_iterator::unsafe_pointer() const noexcept { + // deliberately done without safety guard: + return token.peek(); +} + +simdjson_inline const uint8_t *json_iterator::peek(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // SIMDJSON_CHECK_EOF + return token.peek(delta); +} + +simdjson_inline uint32_t json_iterator::peek_length(int32_t delta) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_more_tokens(delta+1); +#endif // #if SIMDJSON_CHECK_EOF + return token.peek_length(delta); +} + +simdjson_inline const uint8_t *json_iterator::peek(token_position position) const noexcept { + // todo: currently we require end-of-string buffering, but the following + // assert_valid_position should be turned on if/when we lift that condition. + // assert_valid_position(position); + // This is almost surely related to SIMDJSON_CHECK_EOF but given that SIMDJSON_CHECK_EOF + // is ON by default, we have no choice but to disable it for real with a comment. + return token.peek(position); +} + +simdjson_inline uint32_t json_iterator::peek_length(token_position position) const noexcept { +#if SIMDJSON_CHECK_EOF + assert_valid_position(position); +#endif // SIMDJSON_CHECK_EOF + return token.peek_length(position); +} + +simdjson_inline token_position json_iterator::last_position() const noexcept { + // The following line fails under some compilers... + // SIMDJSON_ASSUME(parser->implementation->n_structural_indexes > 0); + // since it has side-effects. + uint32_t n_structural_indexes{parser->implementation->n_structural_indexes}; + SIMDJSON_ASSUME(n_structural_indexes > 0); + return &parser->implementation->structural_indexes[n_structural_indexes - 1]; +} +simdjson_inline const uint8_t *json_iterator::peek_last() const noexcept { + return token.peek(last_position()); +} + +simdjson_inline void json_iterator::ascend_to(depth_t parent_depth) noexcept { + SIMDJSON_ASSUME(parent_depth >= 0 && parent_depth < INT32_MAX - 1); + SIMDJSON_ASSUME(_depth == parent_depth + 1); + _depth = parent_depth; +} + +simdjson_inline void json_iterator::descend_to(depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); + _depth = child_depth; +} + +simdjson_inline depth_t json_iterator::depth() const noexcept { + return _depth; +} + +simdjson_inline uint8_t *&json_iterator::string_buf_loc() noexcept { + return _string_buf_loc; +} + +simdjson_inline error_code json_iterator::report_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error != SUCCESS && _error != UNINITIALIZED && _error != INCORRECT_TYPE && _error != NO_SUCH_FIELD); + logger::log_error(*this, message); + error = _error; + return error; +} + +simdjson_inline token_position json_iterator::position() const noexcept { + return token.position(); +} + +simdjson_inline simdjson_result json_iterator::unescape(raw_json_string in, bool allow_replacement) noexcept { + return parser->unescape(in, _string_buf_loc, allow_replacement); +} + +simdjson_inline simdjson_result json_iterator::unescape_wobbly(raw_json_string in) noexcept { + return parser->unescape_wobbly(in, _string_buf_loc); +} + +simdjson_inline void json_iterator::reenter_child(token_position position, depth_t child_depth) noexcept { + SIMDJSON_ASSUME(child_depth >= 1 && child_depth < INT32_MAX); + SIMDJSON_ASSUME(_depth == child_depth - 1); +#if SIMDJSON_DEVELOPMENT_CHECKS +#ifndef SIMDJSON_CLANG_VISUAL_STUDIO + SIMDJSON_ASSUME(size_t(child_depth) < parser->max_depth()); + SIMDJSON_ASSUME(position >= parser->start_positions[child_depth]); +#endif +#endif + token.set_position(position); + _depth = child_depth; +} + +simdjson_inline error_code json_iterator::consume_character(char c) noexcept { + if (*peek() == c) { + return_current_and_advance(); + return SUCCESS; + } + return TAPE_ERROR; +} + +#if SIMDJSON_DEVELOPMENT_CHECKS + +simdjson_inline token_position json_iterator::start_position(depth_t depth) const noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + return size_t(depth) < parser->max_depth() ? parser->start_positions[depth] : 0; +} + +simdjson_inline void json_iterator::set_start_position(depth_t depth, token_position position) noexcept { + SIMDJSON_ASSUME(size_t(depth) < parser->max_depth()); + if(size_t(depth) < parser->max_depth()) { parser->start_positions[depth] = position; } +} + +#endif + + +simdjson_inline error_code json_iterator::optional_error(error_code _error, const char *message) noexcept { + SIMDJSON_ASSUME(_error == INCORRECT_TYPE || _error == NO_SUCH_FIELD); + logger::log_error(*this, message); + return _error; +} + + +simdjson_warn_unused simdjson_inline bool json_iterator::copy_to_buffer(const uint8_t *json, uint32_t max_len, uint8_t *tmpbuf, size_t N) noexcept { + // This function is not expected to be called in performance-sensitive settings. + // Let us guard against silly cases: + if((N < max_len) || (N == 0)) { return false; } + // Copy to the buffer. + std::memcpy(tmpbuf, json, max_len); + if(N > max_len) { // We pad whatever remains with ' '. + std::memset(tmpbuf + max_len, ' ', N - max_len); + } + return true; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/json_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/json_type-inl.h for westmere: #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* begin file simdjson/generic/ondemand/json_type-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +inline std::ostream& operator<<(std::ostream& out, json_type type) noexcept { + switch (type) { + case json_type::array: out << "array"; break; + case json_type::object: out << "object"; break; + case json_type::number: out << "number"; break; + case json_type::string: out << "string"; break; + case json_type::boolean: out << "boolean"; break; + case json_type::null: out << "null"; break; + default: SIMDJSON_UNREACHABLE(); + } + return out; +} + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson_result &type) noexcept(false) { + return out << type.value(); +} +#endif + + + +simdjson_inline number_type number::get_number_type() const noexcept { + return type; +} + +simdjson_inline bool number::is_uint64() const noexcept { + return get_number_type() == number_type::unsigned_integer; +} + +simdjson_inline uint64_t number::get_uint64() const noexcept { + return payload.unsigned_integer; +} + +simdjson_inline number::operator uint64_t() const noexcept { + return get_uint64(); +} + + +simdjson_inline bool number::is_int64() const noexcept { + return get_number_type() == number_type::signed_integer; +} + +simdjson_inline int64_t number::get_int64() const noexcept { + return payload.signed_integer; +} + +simdjson_inline number::operator int64_t() const noexcept { + return get_int64(); +} + +simdjson_inline bool number::is_double() const noexcept { + return get_number_type() == number_type::floating_point_number; +} + +simdjson_inline double number::get_double() const noexcept { + return payload.floating_point_number; +} + +simdjson_inline number::operator double() const noexcept { + return get_double(); +} + +simdjson_inline double number::as_double() const noexcept { + if(is_double()) { + return payload.floating_point_number; + } + if(is_int64()) { + return double(payload.signed_integer); + } + return double(payload.unsigned_integer); +} + +simdjson_inline void number::append_s64(int64_t value) noexcept { + payload.signed_integer = value; + type = number_type::signed_integer; +} + +simdjson_inline void number::append_u64(uint64_t value) noexcept { + payload.unsigned_integer = value; + type = number_type::unsigned_integer; +} + +simdjson_inline void number::append_double(double value) noexcept { + payload.floating_point_number = value; + type = number_type::floating_point_number; +} + +simdjson_inline void number::skip_double() noexcept { + type = number_type::floating_point_number; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::json_type &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_JSON_TYPE_INL_H +/* end file simdjson/generic/ondemand/json_type-inl.h for westmere */ +/* including simdjson/generic/ondemand/logger-inl.h for westmere: #include "simdjson/generic/ondemand/logger-inl.h" */ +/* begin file simdjson/generic/ondemand/logger-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/logger.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#include +#include + +namespace simdjson { +namespace westmere { +namespace ondemand { +namespace logger { + +static constexpr const char * DASHES = "----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"; +static constexpr const int LOG_EVENT_LEN = 20; +static constexpr const int LOG_BUFFER_LEN = 30; +static constexpr const int LOG_SMALL_BUFFER_LEN = 10; +static int log_depth = 0; // Not threadsafe. Log only. + +// Helper to turn unprintable or newline characters into spaces +static inline char printable_char(char c) { + if (c >= 0x20) { + return c; + } else { + return ' '; + } +} + +template +static inline std::string string_format(const std::string& format, const Args&... args) +{ + SIMDJSON_PUSH_DISABLE_ALL_WARNINGS + int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + auto size = static_cast(size_s); + if (size <= 0) return std::string(); + std::unique_ptr buf(new char[size]); + std::snprintf(buf.get(), size, format.c_str(), args...); + SIMDJSON_POP_DISABLE_WARNINGS + return std::string(buf.get(), buf.get() + size - 1); +} + +static inline log_level get_log_level_from_env() +{ + SIMDJSON_PUSH_DISABLE_WARNINGS + SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe + char *lvl = getenv("SIMDJSON_LOG_LEVEL"); + SIMDJSON_POP_DISABLE_WARNINGS + if (lvl && simdjson_strcasecmp(lvl, "ERROR") == 0) { return log_level::error; } + return log_level::info; +} + +static inline log_level log_threshold() +{ + static log_level threshold = get_log_level_from_env(); + return threshold; +} + +static inline bool should_log(log_level level) +{ + return level >= log_threshold(); +} + +inline void log_event(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "", type, detail, log_level::info); +} +inline void log_value(const json_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_line(iter, "", type, detail, delta, depth_delta, log_level::info); +} + +inline void log_start_value(const json_iterator &iter, token_position index, depth_t depth, const char *type, std::string_view detail) noexcept { + log_line(iter, index, depth, "+", type, detail, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} +inline void log_start_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_line(iter, "+", type, "", delta, depth_delta, log_level::info); + if (LOG_ENABLED) { log_depth++; } +} + +inline void log_end_value(const json_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + if (LOG_ENABLED) { log_depth--; } + log_line(iter, "-", type, "", delta, depth_delta, log_level::info); +} + +inline void log_error(const json_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_line(iter, "ERROR: ", error, detail, delta, depth_delta, log_level::error); +} +inline void log_error(const json_iterator &iter, token_position index, depth_t depth, const char *error, const char *detail) noexcept { + log_line(iter, index, depth, "ERROR: ", error, detail, log_level::error); +} + +inline void log_event(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_event(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_value(const value_iterator &iter, const char *type, std::string_view detail, int delta, int depth_delta) noexcept { + log_value(iter.json_iter(), type, detail, delta, depth_delta); +} + +inline void log_start_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_start_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_end_value(const value_iterator &iter, const char *type, int delta, int depth_delta) noexcept { + log_end_value(iter.json_iter(), type, delta, depth_delta); +} + +inline void log_error(const value_iterator &iter, const char *error, const char *detail, int delta, int depth_delta) noexcept { + log_error(iter.json_iter(), error, detail, delta, depth_delta); +} + +inline void log_headers() noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(log_level::info))) { + // Technically a static variable is not thread-safe, but if you are using threads and logging... well... + static bool displayed_hint{false}; + log_depth = 0; + printf("\n"); + if (!displayed_hint) { + // We only print this helpful header once. + printf("# Logging provides the depth and position of the iterator user-visible steps:\n"); + printf("# +array says 'this is where we were when we discovered the start array'\n"); + printf( + "# -array says 'this is where we were when we ended the array'\n"); + printf("# skip says 'this is a structural or value I am skipping'\n"); + printf("# +/-skip says 'this is a start/end array or object I am skipping'\n"); + printf("#\n"); + printf("# The indentation of the terms (array, string,...) indicates the depth,\n"); + printf("# in addition to the depth being displayed.\n"); + printf("#\n"); + printf("# Every token in the document has a single depth determined by the tokens before it,\n"); + printf("# and is not affected by what the token actually is.\n"); + printf("#\n"); + printf("# Not all structural elements are presented as tokens in the logs.\n"); + printf("#\n"); + printf("# We never give control to the user within an empty array or an empty object.\n"); + printf("#\n"); + printf("# Inside an array, having a depth greater than the array's depth means that\n"); + printf("# we are pointing inside a value.\n"); + printf("# Having a depth equal to the array means that we are pointing right before a value.\n"); + printf("# Having a depth smaller than the array means that we have moved beyond the array.\n"); + displayed_hint = true; + } + printf("\n"); + printf("| %-*s ", LOG_EVENT_LEN, "Event"); + printf("| %-*s ", LOG_BUFFER_LEN, "Buffer"); + printf("| %-*s ", LOG_SMALL_BUFFER_LEN, "Next"); + // printf("| %-*s ", 5, "Next#"); + printf("| %-*s ", 5, "Depth"); + printf("| Detail "); + printf("|\n"); + + printf("|%.*s", LOG_EVENT_LEN + 2, DASHES); + printf("|%.*s", LOG_BUFFER_LEN + 2, DASHES); + printf("|%.*s", LOG_SMALL_BUFFER_LEN + 2, DASHES); + // printf("|%.*s", 5+2, DASHES); + printf("|%.*s", 5 + 2, DASHES); + printf("|--------"); + printf("|\n"); + fflush(stdout); + } + } +} + +template +inline void log_line(const json_iterator &iter, const char *title_prefix, const char *title, std::string_view detail, int delta, int depth_delta, log_level level, Args&&... args) noexcept { + log_line(iter, iter.position()+delta, depth_t(iter.depth()+depth_delta), title_prefix, title, detail, level, std::forward(args)...); +} + +template +inline void log_line(const json_iterator &iter, token_position index, depth_t depth, const char *title_prefix, const char *title, std::string_view detail, log_level level, Args&&... args) noexcept { + if (LOG_ENABLED) { + if (simdjson_unlikely(should_log(level))) { + const int indent = depth * 2; + const auto buf = iter.token.buf; + auto msg = string_format(title, std::forward(args)...); + printf("| %*s%s%-*s ", indent, "", title_prefix, + LOG_EVENT_LEN - indent - int(strlen(title_prefix)), msg.c_str()); + { + // Print the current structural. + printf("| "); + // Before we begin, the index might point right before the document. + // This could be unsafe, see https://github.com/simdjson/simdjson/discussions/1938 + if (index < iter._root) { + printf("%*s", LOG_BUFFER_LEN, ""); + } else { + auto current_structural = &buf[*index]; + for (int i = 0; i < LOG_BUFFER_LEN; i++) { + printf("%c", printable_char(current_structural[i])); + } + } + printf(" "); + } + { + // Print the next structural. + printf("| "); + auto next_structural = &buf[*(index + 1)]; + for (int i = 0; i < LOG_SMALL_BUFFER_LEN; i++) { + printf("%c", printable_char(next_structural[i])); + } + printf(" "); + } + // printf("| %5u ", *(index+1)); + printf("| %5i ", depth); + printf("| %6.*s ", int(detail.size()), detail.data()); + printf("|\n"); + fflush(stdout); + } + } +} + +} // namespace logger +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_LOGGER_INL_H +/* end file simdjson/generic/ondemand/logger-inl.h for westmere */ +/* including simdjson/generic/ondemand/object-inl.h for westmere: #include "simdjson/generic/ondemand/object-inl.h" */ +/* begin file simdjson/generic/ondemand/object-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field_unordered(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_unordered_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) & noexcept { + return find_field_unordered(key); +} +simdjson_inline simdjson_result object::operator[](const std::string_view key) && noexcept { + return std::forward(*this).find_field_unordered(key); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) & noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} +simdjson_inline simdjson_result object::find_field(const std::string_view key) && noexcept { + bool has_value; + SIMDJSON_TRY( iter.find_field_raw(key).get(has_value) ); + if (!has_value) { + logger::log_line(iter.json_iter(), "ERROR: ", "Cannot find key %.*s", "", -1, 0, logger::log_level::error, static_cast(key.size()), key.data()); + return NO_SUCH_FIELD; + } + return value(iter.child()); +} + +simdjson_inline simdjson_result object::start(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_object().error() ); + return object(iter); +} +simdjson_inline simdjson_result object::start_root(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.start_root_object().error() ); + return object(iter); +} +simdjson_inline error_code object::consume() noexcept { + if(iter.is_at_key()) { + /** + * whenever you are pointing at a key, calling skip_child() is + * unsafe because you will hit a string and you will assume that + * it is string value, and this mistake will lead you to make bad + * depth computation. + */ + /** + * We want to 'consume' the key. We could really + * just do _json_iter->return_current_and_advance(); at this + * point, but, for clarity, we will use the high-level API to + * eat the key. We assume that the compiler optimizes away + * most of the work. + */ + simdjson_unused raw_json_string actual_key; + auto error = iter.field_key().get(actual_key); + if (error) { iter.abandon(); return error; }; + // Let us move to the value while we are at it. + if ((error = iter.field_value())) { iter.abandon(); return error; } + } + auto error_skip = iter.json_iter().skip_child(iter.depth()-1); + if(error_skip) { iter.abandon(); } + return error_skip; +} + +simdjson_inline simdjson_result object::raw_json() noexcept { + const uint8_t * starting_point{iter.peek_start()}; + auto error = consume(); + if(error) { return error; } + const uint8_t * final_point{iter._json_iter->peek()}; + return std::string_view(reinterpret_cast(starting_point), size_t(final_point - starting_point)); +} + +simdjson_inline simdjson_result object::started(value_iterator &iter) noexcept { + SIMDJSON_TRY( iter.started_object().error() ); + return object(iter); +} + +simdjson_inline object object::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline object::object(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} + +simdjson_inline simdjson_result object::begin() noexcept { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!iter.is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + return object_iterator(iter); +} +simdjson_inline simdjson_result object::end() noexcept { + return object_iterator(iter); +} + +inline simdjson_result object::at_pointer(std::string_view json_pointer) noexcept { + if (json_pointer[0] != '/') { return INVALID_JSON_POINTER; } + json_pointer = json_pointer.substr(1); + size_t slash = json_pointer.find('/'); + std::string_view key = json_pointer.substr(0, slash); + // Grab the child with the given key + simdjson_result child; + + // If there is an escape character in the key, unescape it and then get the child. + size_t escape = key.find('~'); + if (escape != std::string_view::npos) { + // Unescape the key + std::string unescaped(key); + do { + switch (unescaped[escape+1]) { + case '0': + unescaped.replace(escape, 2, "~"); + break; + case '1': + unescaped.replace(escape, 2, "/"); + break; + default: + return INVALID_JSON_POINTER; // "Unexpected ~ escape character in JSON pointer"); + } + escape = unescaped.find('~', escape+1); + } while (escape != std::string::npos); + child = find_field(unescaped); // Take note find_field does not unescape keys when matching + } else { + child = find_field(key); + } + if(child.error()) { + return child; // we do not continue if there was an error + } + // If there is a /, we have to recurse and look up more of the path + if (slash != std::string_view::npos) { + child = child.at_pointer(json_pointer.substr(slash)); + } + return child; +} + +simdjson_inline simdjson_result object::count_fields() & noexcept { + size_t count{0}; + // Important: we do not consume any of the values. + for(simdjson_unused auto v : *this) { count++; } + // The above loop will always succeed, but we want to report errors. + if(iter.error()) { return iter.error(); } + // We need to move back at the start because we expect users to iterate through + // the object after counting the number of elements. + iter.reset_object(); + return count; +} + +simdjson_inline simdjson_result object::is_empty() & noexcept { + bool is_not_empty; + auto error = iter.reset_object().get(is_not_empty); + if(error) { return error; } + return !is_not_empty; +} + +simdjson_inline simdjson_result object::reset() & noexcept { + return iter.reset_object(); +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::object &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::begin() noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() noexcept { + if (error()) { return error(); } + return first.end(); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) & noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first)[key]; +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) & noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) && noexcept { + if (error()) { return error(); } + return std::forward(first).find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +inline simdjson_result simdjson_result::reset() noexcept { + if (error()) { return error(); } + return first.reset(); +} + +inline simdjson_result simdjson_result::is_empty() noexcept { + if (error()) { return error(); } + return first.is_empty(); +} + +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} + +simdjson_inline simdjson_result simdjson_result::raw_json() noexcept { + if (error()) { return error(); } + return first.raw_json(); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_INL_H +/* end file simdjson/generic/ondemand/object-inl.h for westmere */ +/* including simdjson/generic/ondemand/object_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/object_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/field-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +// +// object_iterator +// + +simdjson_inline object_iterator::object_iterator(const value_iterator &_iter) noexcept + : iter{_iter} +{} + +simdjson_inline simdjson_result object_iterator::operator*() noexcept { + error_code error = iter.error(); + if (error) { iter.abandon(); return error; } + auto result = field::start(iter); + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (result.error()) { iter.abandon(); } + return result; +} +simdjson_inline bool object_iterator::operator==(const object_iterator &other) const noexcept { + return !(*this != other); +} +simdjson_inline bool object_iterator::operator!=(const object_iterator &) const noexcept { + return iter.is_open(); +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline object_iterator &object_iterator::operator++() noexcept { + // TODO this is a safety rail ... users should exit loops as soon as they receive an error. + // Nonetheless, let's see if performance is OK with this if statement--the compiler may give it to us for free. + if (!iter.is_open()) { return *this; } // Iterator will be released if there is an error + + simdjson_unused error_code error; + if ((error = iter.skip_child() )) { return *this; } + + simdjson_unused bool has_value; + if ((error = iter.has_next_field().get(has_value) )) { return *this; }; + return *this; +} +SIMDJSON_POP_DISABLE_WARNINGS + +// +// ### Live States +// +// While iterating or looking up values, depth >= iter.depth. at_start may vary. Error is +// always SUCCESS: +// +// - Start: This is the state when the object is first found and the iterator is just past the {. +// In this state, at_start == true. +// - Next: After we hand a scalar value to the user, or an array/object which they then fully +// iterate over, the iterator is at the , or } before the next value. In this state, +// depth == iter.depth, at_start == false, and error == SUCCESS. +// - Unfinished Business: When we hand an array/object to the user which they do not fully +// iterate over, we need to finish that iteration by skipping child values until we reach the +// Next state. In this state, depth > iter.depth, at_start == false, and error == SUCCESS. +// +// ## Error States +// +// In error states, we will yield exactly one more value before stopping. iter.depth == depth +// and at_start is always false. We decrement after yielding the error, moving to the Finished +// state. +// +// - Chained Error: When the object iterator is part of an error chain--for example, in +// `for (auto tweet : doc["tweets"])`, where the tweet field may be missing or not be an +// object--we yield that error in the loop, exactly once. In this state, error != SUCCESS and +// iter.depth == depth, and at_start == false. We decrement depth when we yield the error. +// - Missing Comma Error: When the iterator ++ method discovers there is no comma between fields, +// we flag that as an error and treat it exactly the same as a Chained Error. In this state, +// error == TAPE_ERROR, iter.depth == depth, and at_start == false. +// +// Errors that occur while reading a field to give to the user (such as when the key is not a +// string or the field is missing a colon) are yielded immediately. Depth is then decremented, +// moving to the Finished state without transitioning through an Error state at all. +// +// ## Terminal State +// +// The terminal state has iter.depth < depth. at_start is always false. +// +// - Finished: When we have reached a }, we are finished. We signal this by decrementing depth. +// In this state, iter.depth < depth, at_start == false, and error == SUCCESS. +// + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::object_iterator &&value +) noexcept + : implementation_simdjson_result_base(std::forward(value)) +{ + first.iter.assert_is_valid(); +} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base({}, error) +{ +} + +simdjson_inline simdjson_result simdjson_result::operator*() noexcept { + if (error()) { return error(); } + return *first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator==(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return !error(); } + return first == other.first; +} +// If we're iterating and there is an error, return the error once. +simdjson_inline bool simdjson_result::operator!=(const simdjson_result &other) const noexcept { + if (!first.iter.is_valid()) { return error(); } + return first != other.first; +} +// Checks for ']' and ',' +simdjson_inline simdjson_result &simdjson_result::operator++() noexcept { + // Clear the error if there is one, so we don't yield it twice + if (error()) { second = SUCCESS; return *this; } + ++first; + return *this; +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_OBJECT_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/object_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/parser-inl.h for westmere: #include "simdjson/generic/ondemand/parser-inl.h" */ +/* begin file simdjson/generic/ondemand/parser-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/padded_string_view.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/internal/dom_parser_implementation.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document_stream.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/parser.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline parser::parser(size_t max_capacity) noexcept + : _max_capacity{max_capacity} { +} + +simdjson_warn_unused simdjson_inline error_code parser::allocate(size_t new_capacity, size_t new_max_depth) noexcept { + if (new_capacity > max_capacity()) { return CAPACITY; } + if (string_buf && new_capacity == capacity() && new_max_depth == max_depth()) { return SUCCESS; } + + // string_capacity copied from document::allocate + _capacity = 0; + size_t string_capacity = SIMDJSON_ROUNDUP_N(5 * new_capacity / 3 + SIMDJSON_PADDING, 64); + string_buf.reset(new (std::nothrow) uint8_t[string_capacity]); +#if SIMDJSON_DEVELOPMENT_CHECKS + start_positions.reset(new (std::nothrow) token_position[new_max_depth]); +#endif + if (implementation) { + SIMDJSON_TRY( implementation->set_capacity(new_capacity) ); + SIMDJSON_TRY( implementation->set_max_depth(new_max_depth) ); + } else { + SIMDJSON_TRY( simdjson::get_active_implementation()->create_dom_parser_implementation(new_capacity, new_max_depth, implementation) ); + } + _capacity = new_capacity; + _max_depth = new_max_depth; + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length() || !string_buf) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return document::start({ reinterpret_cast(json.data()), this }); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const char *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const uint8_t *json, size_t len, size_t allocated) & noexcept { + return iterate(padded_string_view(json, len, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string_view json, size_t allocated) & noexcept { + return iterate(padded_string_view(json, allocated)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { + return iterate(padded_string_view(json)); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + padded_string_view json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const simdjson_result &result) & noexcept { + // We don't presently have a way to temporarily get a const T& from a simdjson_result without throwing an exception + SIMDJSON_TRY( result.error() ); + const padded_string &json = result.value_unsafe(); + return iterate(json); +} + +simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { + if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + + // Allocate if needed + if (capacity() < json.length()) { + SIMDJSON_TRY( allocate(json.length(), max_depth()) ); + } + + // Run stage 1. + SIMDJSON_TRY( implementation->stage1(reinterpret_cast(json.data()), json.length(), stage1_mode::regular) ); + return json_iterator(reinterpret_cast(json.data()), this); +} + +inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } + if(allow_comma_separated && batch_size < len) { batch_size = len; } + return document_stream(*this, buf, len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); +} + +simdjson_inline size_t parser::capacity() const noexcept { + return _capacity; +} +simdjson_inline size_t parser::max_capacity() const noexcept { + return _max_capacity; +} +simdjson_inline size_t parser::max_depth() const noexcept { + return _max_depth; +} + +simdjson_inline void parser::set_max_capacity(size_t max_capacity) noexcept { + if(max_capacity < dom::MINIMAL_DOCUMENT_CAPACITY) { + _max_capacity = max_capacity; + } else { + _max_capacity = dom::MINIMAL_DOCUMENT_CAPACITY; + } +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape(raw_json_string in, uint8_t *&dst, bool allow_replacement) const noexcept { + uint8_t *end = implementation->parse_string(in.buf, dst, allow_replacement); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +simdjson_inline simdjson_warn_unused simdjson_result parser::unescape_wobbly(raw_json_string in, uint8_t *&dst) const noexcept { + uint8_t *end = implementation->parse_wobbly_string(in.buf, dst); + if (!end) { return STRING_ERROR; } + std::string_view result(reinterpret_cast(dst), end-dst); + dst = end; + return result; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::parser &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_PARSER_INL_H +/* end file simdjson/generic/ondemand/parser-inl.h for westmere */ +/* including simdjson/generic/ondemand/raw_json_string-inl.h for westmere: #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* begin file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +namespace westmere { +namespace ondemand { + +simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} + +simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } + + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;pos < target.size() && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;pos < target.size();pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + +simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(const char* target) noexcept { + size_t pos{0}; + // if the content has no escape character, just scan through it quickly! + for(;target[pos] && target[pos] != '\\';pos++) {} + // slow path may begin. + bool escaping{false}; + for(;target[pos];pos++) { + if((target[pos] == '"') && !escaping) { + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(size_t length, std::string_view target) const noexcept { + // If we are going to call memcmp, then we must know something about the length of the raw_json_string. + return (length >= target.size()) && (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); +} + +simdjson_inline bool raw_json_string::unsafe_is_equal(std::string_view target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + if(target.size() <= SIMDJSON_PADDING) { + return (raw()[target.size()] == '"') && !memcmp(raw(), target.data(), target.size()); + } + const char * r{raw()}; + size_t pos{0}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(std::string_view target) const noexcept { + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;pos < target.size();pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + + +simdjson_inline bool raw_json_string::unsafe_is_equal(const char * target) const noexcept { + // Assumptions: 'target' does not contain unescaped quote characters, is null terminated and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_inline bool raw_json_string::is_equal(const char* target) const noexcept { + // Assumptions: does not contain unescaped quote characters, and + // the raw content is quote terminated within a valid JSON string. + const char * r{raw()}; + size_t pos{0}; + bool escaping{false}; + for(;target[pos];pos++) { + if(r[pos] != target[pos]) { return false; } + // if target is a compile-time constant and it is free from + // quotes, then the next part could get optimized away through + // inlining. + if((target[pos] == '"') && !escaping) { + // We have reached the end of the raw_json_string but + // the target is not done. + return false; + } else if(target[pos] == '\\') { + escaping = !escaping; + } else { + escaping = false; + } + } + if(r[pos] != '"') { return false; } + return true; +} + +simdjson_unused simdjson_inline bool operator==(const raw_json_string &a, std::string_view c) noexcept { + return a.unsafe_is_equal(c); +} + +simdjson_unused simdjson_inline bool operator==(std::string_view c, const raw_json_string &a) noexcept { + return a == c; +} + +simdjson_unused simdjson_inline bool operator!=(const raw_json_string &a, std::string_view c) noexcept { + return !(a == c); +} + +simdjson_unused simdjson_inline bool operator!=(std::string_view c, const raw_json_string &a) noexcept { + return !(a == c); +} + + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape(json_iterator &iter, bool allow_replacement) const noexcept { + return iter.unescape(*this, allow_replacement); +} + +simdjson_inline simdjson_warn_unused simdjson_result raw_json_string::unescape_wobbly(json_iterator &iter) const noexcept { + return iter.unescape_wobbly(*this); +} + +simdjson_unused simdjson_inline std::ostream &operator<<(std::ostream &out, const raw_json_string &str) noexcept { + bool in_escape = false; + const char *s = str.raw(); + while (true) { + switch (*s) { + case '\\': in_escape = !in_escape; break; + case '"': if (in_escape) { in_escape = false; } else { return out; } break; + default: if (in_escape) { in_escape = false; } + } + out << *s; + s++; + } +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::raw_json_string &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +simdjson_inline simdjson_result simdjson_result::raw() const noexcept { + if (error()) { return error(); } + return first.raw(); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { + if (error()) { return error(); } + return first.unescape(iter, allow_replacement); +} +simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept { + if (error()) { return error(); } + return first.unescape_wobbly(iter); +} +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_INL_H +/* end file simdjson/generic/ondemand/raw_json_string-inl.h for westmere */ +/* including simdjson/generic/ondemand/serialization-inl.h for westmere: #include "simdjson/generic/ondemand/serialization-inl.h" */ +/* begin file simdjson/generic/ondemand/serialization-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/document-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { + +inline std::string_view trim(const std::string_view str) noexcept { + // We can almost surely do better by rolling our own find_first_not_of function. + size_t first = str.find_first_not_of(" \t\n\r"); + // If we have the empty string (just white space), then no trimming is possible, and + // we return the empty string_view. + if (std::string_view::npos == first) { return std::string_view(); } + size_t last = str.find_last_not_of(" \t\n\r"); + return str.substr(first, (last - first + 1)); +} + + +inline simdjson_result to_json_string(westmere::ondemand::document& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(westmere::ondemand::document_reference& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(westmere::ondemand::value& x) noexcept { + /** + * If we somehow receive a value that has already been consumed, + * then the following code could be in trouble. E.g., we create + * an array as needed, but if an array was already created, then + * it could be bad. + */ + using namespace westmere::ondemand; + westmere::ondemand::json_type t; + auto error = x.type().get(t); + if(error != SUCCESS) { return error; } + switch (t) + { + case json_type::array: + { + westmere::ondemand::array array; + error = x.get_array().get(array); + if(error) { return error; } + return to_json_string(array); + } + case json_type::object: + { + westmere::ondemand::object object; + error = x.get_object().get(object); + if(error) { return error; } + return to_json_string(object); + } + default: + return trim(x.raw_json_token()); + } +} + +inline simdjson_result to_json_string(westmere::ondemand::object& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(westmere::ondemand::array& x) noexcept { + std::string_view v; + auto error = x.raw_json().get(v); + if(error) {return error; } + return trim(v); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} + +inline simdjson_result to_json_string(simdjson_result x) { + if (x.error()) { return x.error(); } + return to_json_string(x.value_unsafe()); +} +} // namespace simdjson + +namespace simdjson { namespace westmere { namespace ondemand { + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::value x) { + std::string_view v; + auto error = simdjson::to_json_string(x).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::array value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document_reference& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result&& x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::document& value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif + +#if SIMDJSON_EXCEPTIONS +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + throw simdjson::simdjson_error(error); + } +} +inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { + if (x.error()) { throw simdjson::simdjson_error(x.error()); } + return (out << x.value()); +} +#else +inline std::ostream& operator<<(std::ostream& out, simdjson::westmere::ondemand::object value) { + std::string_view v; + auto error = simdjson::to_json_string(value).get(v); + if(error == simdjson::SUCCESS) { + return (out << v); + } else { + return (out << error); + } +} +#endif +}}} // namespace simdjson::westmere::ondemand + +#endif // SIMDJSON_GENERIC_ONDEMAND_SERIALIZATION_INL_H +/* end file simdjson/generic/ondemand/serialization-inl.h for westmere */ +/* including simdjson/generic/ondemand/token_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/token_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/token_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base-inl.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline token_iterator::token_iterator( + const uint8_t *_buf, + token_position position +) noexcept : buf{_buf}, _position{position} +{ +} + +simdjson_inline uint32_t token_iterator::current_offset() const noexcept { + return *(_position); +} + + +simdjson_inline const uint8_t *token_iterator::return_current_and_advance() noexcept { + return &buf[*(_position++)]; +} + +simdjson_inline const uint8_t *token_iterator::peek(token_position position) const noexcept { + return &buf[*position]; +} +simdjson_inline uint32_t token_iterator::peek_index(token_position position) const noexcept { + return *position; +} +simdjson_inline uint32_t token_iterator::peek_length(token_position position) const noexcept { + return *(position+1) - *position; +} + +simdjson_inline const uint8_t *token_iterator::peek(int32_t delta) const noexcept { + return &buf[*(_position+delta)]; +} +simdjson_inline uint32_t token_iterator::peek_index(int32_t delta) const noexcept { + return *(_position+delta); +} +simdjson_inline uint32_t token_iterator::peek_length(int32_t delta) const noexcept { + return *(_position+delta+1) - *(_position+delta); +} + +simdjson_inline token_position token_iterator::position() const noexcept { + return _position; +} +simdjson_inline void token_iterator::set_position(token_position target_position) noexcept { + _position = target_position; +} + +simdjson_inline bool token_iterator::operator==(const token_iterator &other) const noexcept { + return _position == other._position; +} +simdjson_inline bool token_iterator::operator!=(const token_iterator &other) const noexcept { + return _position != other._position; +} +simdjson_inline bool token_iterator::operator>(const token_iterator &other) const noexcept { + return _position > other._position; +} +simdjson_inline bool token_iterator::operator>=(const token_iterator &other) const noexcept { + return _position >= other._position; +} +simdjson_inline bool token_iterator::operator<(const token_iterator &other) const noexcept { + return _position < other._position; +} +simdjson_inline bool token_iterator::operator<=(const token_iterator &other) const noexcept { + return _position <= other._position; +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::token_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_TOKEN_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/token_iterator-inl.h for westmere */ +/* including simdjson/generic/ondemand/value-inl.h for westmere: #include "simdjson/generic/ondemand/value-inl.h" */ +/* begin file simdjson/generic/ondemand/value-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/array_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline value::value(const value_iterator &_iter) noexcept + : iter{_iter} +{ +} +simdjson_inline value value::start(const value_iterator &iter) noexcept { + return iter; +} +simdjson_inline value value::resume(const value_iterator &iter) noexcept { + return iter; +} + +simdjson_inline simdjson_result value::get_array() noexcept { + return array::start(iter); +} +simdjson_inline simdjson_result value::get_object() noexcept { + return object::start(iter); +} +simdjson_inline simdjson_result value::start_or_resume_object() noexcept { + if (iter.at_start()) { + return get_object(); + } else { + return object::resume(iter); + } +} + +simdjson_inline simdjson_result value::get_raw_json_string() noexcept { + return iter.get_raw_json_string(); +} +simdjson_inline simdjson_result value::get_string(bool allow_replacement) noexcept { + return iter.get_string(allow_replacement); +} +simdjson_inline simdjson_result value::get_wobbly_string() noexcept { + return iter.get_wobbly_string(); +} +simdjson_inline simdjson_result value::get_double() noexcept { + return iter.get_double(); +} +simdjson_inline simdjson_result value::get_double_in_string() noexcept { + return iter.get_double_in_string(); +} +simdjson_inline simdjson_result value::get_uint64() noexcept { + return iter.get_uint64(); +} +simdjson_inline simdjson_result value::get_uint64_in_string() noexcept { + return iter.get_uint64_in_string(); +} +simdjson_inline simdjson_result value::get_int64() noexcept { + return iter.get_int64(); +} +simdjson_inline simdjson_result value::get_int64_in_string() noexcept { + return iter.get_int64_in_string(); +} +simdjson_inline simdjson_result value::get_bool() noexcept { + return iter.get_bool(); +} +simdjson_inline simdjson_result value::is_null() noexcept { + return iter.is_null(); +} +template<> simdjson_inline simdjson_result value::get() noexcept { return get_array(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_object(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_raw_json_string(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_string(false); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_number(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_double(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_uint64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_int64(); } +template<> simdjson_inline simdjson_result value::get() noexcept { return get_bool(); } + +template simdjson_inline error_code value::get(T &out) noexcept { + return get().get(out); +} + +#if SIMDJSON_EXCEPTIONS +simdjson_inline value::operator array() noexcept(false) { + return get_array(); +} +simdjson_inline value::operator object() noexcept(false) { + return get_object(); +} +simdjson_inline value::operator uint64_t() noexcept(false) { + return get_uint64(); +} +simdjson_inline value::operator int64_t() noexcept(false) { + return get_int64(); +} +simdjson_inline value::operator double() noexcept(false) { + return get_double(); +} +simdjson_inline value::operator std::string_view() noexcept(false) { + return get_string(false); +} +simdjson_inline value::operator raw_json_string() noexcept(false) { + return get_raw_json_string(); +} +simdjson_inline value::operator bool() noexcept(false) { + return get_bool(); +} +#endif + +simdjson_inline simdjson_result value::begin() & noexcept { + return get_array().begin(); +} +simdjson_inline simdjson_result value::end() & noexcept { + return {}; +} +simdjson_inline simdjson_result value::count_elements() & noexcept { + simdjson_result answer; + auto a = get_array(); + answer = a.count_elements(); + // count_elements leaves you pointing inside the array, at the first element. + // We need to move back so that the user can create a new array (which requires that + // we point at '['). + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::count_fields() & noexcept { + simdjson_result answer; + auto a = get_object(); + answer = a.count_fields(); + iter.move_at_start(); + return answer; +} +simdjson_inline simdjson_result value::at(size_t index) noexcept { + auto a = get_array(); + return a.at(index); +} + +simdjson_inline simdjson_result value::find_field(std::string_view key) noexcept { + return start_or_resume_object().find_field(key); +} +simdjson_inline simdjson_result value::find_field(const char *key) noexcept { + return start_or_resume_object().find_field(key); +} + +simdjson_inline simdjson_result value::find_field_unordered(std::string_view key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} +simdjson_inline simdjson_result value::find_field_unordered(const char *key) noexcept { + return start_or_resume_object().find_field_unordered(key); +} + +simdjson_inline simdjson_result value::operator[](std::string_view key) noexcept { + return start_or_resume_object()[key]; +} +simdjson_inline simdjson_result value::operator[](const char *key) noexcept { + return start_or_resume_object()[key]; +} + +simdjson_inline simdjson_result value::type() noexcept { + return iter.type(); +} + +simdjson_inline simdjson_result value::is_scalar() noexcept { + json_type this_type; + auto error = type().get(this_type); + if(error) { return error; } + return ! ((this_type == json_type::array) || (this_type == json_type::object)); +} + +simdjson_inline bool value::is_negative() noexcept { + return iter.is_negative(); +} + +simdjson_inline simdjson_result value::is_integer() noexcept { + return iter.is_integer(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number_type() noexcept { + return iter.get_number_type(); +} +simdjson_warn_unused simdjson_inline simdjson_result value::get_number() noexcept { + return iter.get_number(); +} + +simdjson_inline std::string_view value::raw_json_token() noexcept { + return std::string_view(reinterpret_cast(iter.peek_start()), iter.peek_start_length()); +} + +simdjson_inline simdjson_result value::current_location() noexcept { + return iter.json_iter().current_location(); +} + +simdjson_inline int32_t value::current_depth() const noexcept{ + return iter.json_iter().depth(); +} + +simdjson_inline simdjson_result value::at_pointer(std::string_view json_pointer) noexcept { + json_type t; + SIMDJSON_TRY(type().get(t)); + switch (t) + { + case json_type::array: + return (*this).get_array().at_pointer(json_pointer); + case json_type::object: + return (*this).get_object().at_pointer(json_pointer); + default: + return INVALID_JSON_POINTER; + } +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result( + westmere::ondemand::value &&value +) noexcept : + implementation_simdjson_result_base( + std::forward(value) + ) +{ +} +simdjson_inline simdjson_result::simdjson_result( + error_code error +) noexcept : + implementation_simdjson_result_base(error) +{ +} +simdjson_inline simdjson_result simdjson_result::count_elements() & noexcept { + if (error()) { return error(); } + return first.count_elements(); +} +simdjson_inline simdjson_result simdjson_result::count_fields() & noexcept { + if (error()) { return error(); } + return first.count_fields(); +} +simdjson_inline simdjson_result simdjson_result::at(size_t index) noexcept { + if (error()) { return error(); } + return first.at(index); +} +simdjson_inline simdjson_result simdjson_result::begin() & noexcept { + if (error()) { return error(); } + return first.begin(); +} +simdjson_inline simdjson_result simdjson_result::end() & noexcept { + if (error()) { return error(); } + return {}; +} + +simdjson_inline simdjson_result simdjson_result::find_field(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} +simdjson_inline simdjson_result simdjson_result::find_field(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field(key); +} + +simdjson_inline simdjson_result simdjson_result::find_field_unordered(std::string_view key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} +simdjson_inline simdjson_result simdjson_result::find_field_unordered(const char *key) noexcept { + if (error()) { return error(); } + return first.find_field_unordered(key); +} + +simdjson_inline simdjson_result simdjson_result::operator[](std::string_view key) noexcept { + if (error()) { return error(); } + return first[key]; +} +simdjson_inline simdjson_result simdjson_result::operator[](const char *key) noexcept { + if (error()) { return error(); } + return first[key]; +} + +simdjson_inline simdjson_result simdjson_result::get_array() noexcept { + if (error()) { return error(); } + return first.get_array(); +} +simdjson_inline simdjson_result simdjson_result::get_object() noexcept { + if (error()) { return error(); } + return first.get_object(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64() noexcept { + if (error()) { return error(); } + return first.get_uint64(); +} +simdjson_inline simdjson_result simdjson_result::get_uint64_in_string() noexcept { + if (error()) { return error(); } + return first.get_uint64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_int64() noexcept { + if (error()) { return error(); } + return first.get_int64(); +} +simdjson_inline simdjson_result simdjson_result::get_int64_in_string() noexcept { + if (error()) { return error(); } + return first.get_int64_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_double() noexcept { + if (error()) { return error(); } + return first.get_double(); +} +simdjson_inline simdjson_result simdjson_result::get_double_in_string() noexcept { + if (error()) { return error(); } + return first.get_double_in_string(); +} +simdjson_inline simdjson_result simdjson_result::get_string(bool allow_replacement) noexcept { + if (error()) { return error(); } + return first.get_string(allow_replacement); +} +simdjson_inline simdjson_result simdjson_result::get_wobbly_string() noexcept { + if (error()) { return error(); } + return first.get_wobbly_string(); +} +simdjson_inline simdjson_result simdjson_result::get_raw_json_string() noexcept { + if (error()) { return error(); } + return first.get_raw_json_string(); +} +simdjson_inline simdjson_result simdjson_result::get_bool() noexcept { + if (error()) { return error(); } + return first.get_bool(); +} +simdjson_inline simdjson_result simdjson_result::is_null() noexcept { + if (error()) { return error(); } + return first.is_null(); +} + +template simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return first.get(); +} +template simdjson_inline error_code simdjson_result::get(T &out) noexcept { + if (error()) { return error(); } + return first.get(out); +} + +template<> simdjson_inline simdjson_result simdjson_result::get() noexcept { + if (error()) { return error(); } + return std::move(first); +} +template<> simdjson_inline error_code simdjson_result::get(westmere::ondemand::value &out) noexcept { + if (error()) { return error(); } + out = first; + return SUCCESS; +} + +simdjson_inline simdjson_result simdjson_result::type() noexcept { + if (error()) { return error(); } + return first.type(); +} +simdjson_inline simdjson_result simdjson_result::is_scalar() noexcept { + if (error()) { return error(); } + return first.is_scalar(); +} +simdjson_inline simdjson_result simdjson_result::is_negative() noexcept { + if (error()) { return error(); } + return first.is_negative(); +} +simdjson_inline simdjson_result simdjson_result::is_integer() noexcept { + if (error()) { return error(); } + return first.is_integer(); +} +simdjson_inline simdjson_result simdjson_result::get_number_type() noexcept { + if (error()) { return error(); } + return first.get_number_type(); +} +simdjson_inline simdjson_result simdjson_result::get_number() noexcept { + if (error()) { return error(); } + return first.get_number(); +} +#if SIMDJSON_EXCEPTIONS +simdjson_inline simdjson_result::operator westmere::ondemand::array() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::object() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator uint64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator int64_t() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator double() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator std::string_view() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator westmere::ondemand::raw_json_string() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +simdjson_inline simdjson_result::operator bool() noexcept(false) { + if (error()) { throw simdjson_error(error()); } + return first; +} +#endif + +simdjson_inline simdjson_result simdjson_result::raw_json_token() noexcept { + if (error()) { return error(); } + return first.raw_json_token(); +} + +simdjson_inline simdjson_result simdjson_result::current_location() noexcept { + if (error()) { return error(); } + return first.current_location(); +} + +simdjson_inline simdjson_result simdjson_result::current_depth() const noexcept { + if (error()) { return error(); } + return first.current_depth(); +} + +simdjson_inline simdjson_result simdjson_result::at_pointer(std::string_view json_pointer) noexcept { + if (error()) { return error(); } + return first.at_pointer(json_pointer); +} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_INL_H +/* end file simdjson/generic/ondemand/value-inl.h for westmere */ +/* including simdjson/generic/ondemand/value_iterator-inl.h for westmere: #include "simdjson/generic/ondemand/value_iterator-inl.h" */ +/* begin file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ +#ifndef SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/base.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/atomparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/numberparsing.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_iterator.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_type-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/raw_json_string-inl.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value_iterator.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace ondemand { + +simdjson_inline value_iterator::value_iterator( + json_iterator *json_iter, + depth_t depth, + token_position start_position +) noexcept : _json_iter{json_iter}, _depth{depth}, _start_position{start_position} +{ +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_object() noexcept { + SIMDJSON_TRY( start_container('{', "Not an object", "object") ); + return started_root_object(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_object() noexcept { + assert_at_container_start(); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + if (*_json_iter->peek() == '}') { + logger::log_value(*_json_iter, "empty object"); + _json_iter->return_current_and_advance(); + end_container(); + return false; + } + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_object() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // object: e.g., `{"a":2} foo }`. Users concerned with garbage content should + // call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != '}') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing } at end"); + } + // If the last character is } *and* the first gibberish character is also '}' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == '}') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed object. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_object() noexcept { + auto error = check_root_object(); + if(error) { return error; } + return started_object(); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::end_container() noexcept { +#if SIMDJSON_CHECK_EOF + if (depth() > 1 && at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing parent ] or }"); } + // if (depth() <= 1 && !at_end()) { return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing [ or { at start"); } +#endif // SIMDJSON_CHECK_EOF + _json_iter->ascend_to(depth()-1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_field() noexcept { + assert_at_next(); + + // It's illegal to call this unless there are more tokens: anything that ends in } or ] is + // obligated to verify there are more tokens if they are not the top level. + switch (*_json_iter->return_current_and_advance()) { + case '}': + logger::log_end_value(*_json_iter, "object"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between object fields"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_raw(const std::string_view key) noexcept { + error_code error; + bool has_value; + // + // Initially, the object can be in one of a few different places: + // + // 1. The start of the object, at the first field: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + if (at_first_field()) { + has_value = true; + + // + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + return false; + + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + if ((error = skip_child() )) { abandon(); return error; } + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + while (has_value) { + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + //if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); // Skip the value entirely + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + + // If the loop ended, we're out of fields to look at. + return false; +} + +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::find_field_unordered_raw(const std::string_view key) noexcept { + /** + * When find_field_unordered_raw is called, we can either be pointing at the + * first key, pointing outside (at the closing brace) or if a key was matched + * we can be either pointing right afterthe ':' right before the value (that we need skip), + * or we may have consumed the value and we might be at a comma or at the + * final brace (ready for a call to has_next_field()). + */ + error_code error; + bool has_value; + + // First, we scan from that point to the end. + // If we don't find a match, we may loop back around, and scan from the beginning to that point. + token_position search_start = _json_iter->position(); + + // We want to know whether we need to go back to the beginning. + bool at_first = at_first_field(); + /////////////// + // Initially, the object can be in one of a few different places: + // + // 1. At the first key: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2, index 1) + // ``` + // + if (at_first) { + has_value = true; + + // 2. When a previous search did not yield a value or the object is empty: + // + // ``` + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // { } + // ^ (depth 0, index 2) + // ``` + // + } else if (!is_open()) { + +#if SIMDJSON_DEVELOPMENT_CHECKS + // If we're past the end of the object, we're being iterated out of order. + // Note: this isn't perfect detection. It's possible the user is inside some other object; if so, + // this object iterator will blithely scan that object for fields. + if (_json_iter->depth() < depth() - 1) { return OUT_OF_ORDER_ITERATION; } +#endif + SIMDJSON_TRY(reset_object().get(has_value)); + at_first = true; + // 3. When a previous search found a field or an iterator yielded a value: + // + // ``` + // // When a field was not fully consumed (or not even touched at all) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 2) + // // When a field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // When the last field was fully consumed + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // ``` + // + } else { + // If someone queried a key but they not did access the value, then we are left pointing + // at the ':' and we need to move forward through the value... If the value was + // processed then skip_child() does not move the iterator (but may adjust the depth). + if ((error = skip_child() )) { abandon(); return error; } + search_start = _json_iter->position(); + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } +#if SIMDJSON_DEVELOPMENT_CHECKS + if (_json_iter->start_position(_depth) != start_position()) { return OUT_OF_ORDER_ITERATION; } +#endif + } + + // After initial processing, we will be in one of two states: + // + // ``` + // // At the beginning of a field + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 1) + // // At the end of the object + // { "a": [ 1, 2 ], "b": [ 3, 4 ] } + // ^ (depth 0) + // ``` + // + // Next, we find a match starting from the current position. + while (has_value) { + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + if ((error = field_key().get(actual_key) )) { abandon(); return error; }; + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + if ((error = field_value() )) { abandon(); return error; } + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + if ((error = has_next_field().get(has_value) )) { abandon(); return error; } + } + // Performance note: it maybe wasteful to rewind to the beginning when there might be + // no other query following. Indeed, it would require reskipping the whole object. + // Instead, you can just stay where you are. If there is a new query, there is always time + // to rewind. + if(at_first) { return false; } + + // If we reach the end without finding a match, search the rest of the fields starting at the + // beginning of the object. + // (We have already run through the object before, so we've already validated its structure. We + // don't check errors in this bit.) + SIMDJSON_TRY(reset_object().get(has_value)); + while (true) { + SIMDJSON_ASSUME(has_value); // we should reach search_start before ever reaching the end of the object + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); // We must be at the start of a field + + // Get the key and colon, stopping at the value. + raw_json_string actual_key; + // size_t max_key_length = _json_iter->peek_length() - 2; // -2 for the two quotes + // Note: _json_iter->peek_length() - 2 might overflow if _json_iter->peek_length() < 2. + // field_key() advances the pointer and checks that '"' is found (corresponding to a key). + // The depth is left unchanged by field_key(). + error = field_key().get(actual_key); SIMDJSON_ASSUME(!error); + // field_value() will advance and check that we find a ':' separating the + // key and the value. It will also increment the depth by one. + error = field_value(); SIMDJSON_ASSUME(!error); + + // If it matches, stop and return + // We could do it this way if we wanted to allow arbitrary + // key content (including escaped quotes). + // if (actual_key.unsafe_is_equal(max_key_length, key)) { + // Instead we do the following which may trigger buffer overruns if the + // user provides an adversarial key (containing a well placed unescaped quote + // character and being longer than the number of bytes remaining in the JSON + // input). + if (actual_key.unsafe_is_equal(key)) { + logger::log_event(*this, "match", key, -2); + // If we return here, then we return while pointing at the ':' that we just checked. + return true; + } + + // No match: skip the value and see if , or } is next + logger::log_event(*this, "no match", key, -2); + // The call to skip_child is meant to skip over the value corresponding to the key. + // After skip_child(), we are right before the next comma (',') or the final brace ('}'). + SIMDJSON_TRY( skip_child() ); + // If we reached the end of the key-value pair we started from, then we know + // that the key is not there so we return false. We are either right before + // the next comma or the final brace. + if(_json_iter->position() == search_start) { return false; } + // The has_next_field() advances the pointer and check that either ',' or '}' is found. + // It returns true if ',' is found, false otherwise. If anything other than ',' or '}' is found, + // then we are in error and we abort. + error = has_next_field().get(has_value); SIMDJSON_ASSUME(!error); + // If we make the mistake of exiting here, then we could be left pointing at a key + // in the middle of an object. That's not an allowable state. + } + // If the loop ended, we're out of fields to look at. The program should + // never reach this point. + return false; +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::field_key() noexcept { + assert_at_next(); + + const uint8_t *key = _json_iter->return_current_and_advance(); + if (*(key++) != '"') { return report_error(TAPE_ERROR, "Object key is not a string"); } + return raw_json_string(key); +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::field_value() noexcept { + assert_at_next(); + + if (*_json_iter->return_current_and_advance() != ':') { return report_error(TAPE_ERROR, "Missing colon in object field"); } + _json_iter->descend_to(depth()+1); + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::start_root_array() noexcept { + SIMDJSON_TRY( start_container('[', "Not an array", "array") ); + return started_root_array(); +} + +inline std::string value_iterator::to_string() const noexcept { + auto answer = std::string("value_iterator [ depth : ") + std::to_string(_depth) + std::string(", "); + if(_json_iter != nullptr) { answer += _json_iter->to_string(); } + answer += std::string(" ]"); + return answer; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_array() noexcept { + assert_at_container_start(); + if (*_json_iter->peek() == ']') { + logger::log_value(*_json_iter, "empty array"); + _json_iter->return_current_and_advance(); + SIMDJSON_TRY( end_container() ); + return false; + } + _json_iter->descend_to(depth()+1); +#if SIMDJSON_DEVELOPMENT_CHECKS + _json_iter->set_start_position(_depth, start_position()); +#endif + return true; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::check_root_array() noexcept { + // When in streaming mode, we cannot expect peek_last() to be the last structural element of the + // current document. It only works in the normal mode where we have indexed a single document. + // Note that adding a check for 'streaming' is not expensive since we only have at most + // one root element. + if ( ! _json_iter->streaming() ) { + // The following lines do not fully protect against garbage content within the + // array: e.g., `[1, 2] foo]`. Users concerned with garbage content should + // also call `at_end()` on the document instance at the end of the processing to + // ensure that the processing has finished at the end. + // + if (*_json_iter->peek_last() != ']') { + _json_iter->abandon(); + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "missing ] at end"); + } + // If the last character is ] *and* the first gibberish character is also ']' + // then on-demand could accidentally go over. So we need additional checks. + // https://github.com/simdjson/simdjson/issues/1834 + // Checking that the document is balanced requires a full scan which is potentially + // expensive, but it only happens in edge cases where the first padding character is + // a closing bracket. + if ((*_json_iter->peek(_json_iter->end_position()) == ']') && (!_json_iter->balanced())) { + _json_iter->abandon(); + // The exact error would require more work. It will typically be an unclosed array. + return report_error(INCOMPLETE_ARRAY_OR_OBJECT, "the document is unbalanced"); + } + } + return SUCCESS; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::started_root_array() noexcept { + auto error = check_root_array(); + if (error) { return error; } + return started_array(); +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::has_next_element() noexcept { + assert_at_next(); + + logger::log_event(*this, "has_next_element"); + switch (*_json_iter->return_current_and_advance()) { + case ']': + logger::log_end_value(*_json_iter, "array"); + SIMDJSON_TRY( end_container() ); + return false; + case ',': + _json_iter->descend_to(depth()+1); + return true; + default: + return report_error(TAPE_ERROR, "Missing comma between array elements"); + } +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_bool(const uint8_t *json) const noexcept { + auto not_true = atomparsing::str4ncmp(json, "true"); + auto not_false = atomparsing::str4ncmp(json, "fals") | (json[4] ^ 'e'); + bool error = (not_true && not_false) || jsoncharutils::is_not_structural_or_whitespace(json[not_true ? 5 : 4]); + if (error) { return incorrect_type_error("Not a boolean"); } + return simdjson_result(!not_true); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::parse_null(const uint8_t *json) const noexcept { + bool is_null_string = !atomparsing::str4ncmp(json, "null") && jsoncharutils::is_structural_or_whitespace(json[4]); + // if we start with 'n', we must be a null + if(!is_null_string && json[0]=='n') { return incorrect_type_error("Not a null but starts with n"); } + return is_null_string; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_string(bool allow_replacement) noexcept { + return get_raw_json_string().unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_wobbly_string() noexcept { + return get_raw_json_string().unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_raw_json_string() noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64() noexcept { + auto result = numberparsing::parse_unsigned(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_uint64_in_string() noexcept { + auto result = numberparsing::parse_unsigned_in_string(peek_non_root_scalar("uint64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("uint64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64() noexcept { + auto result = numberparsing::parse_integer(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_int64_in_string() noexcept { + auto result = numberparsing::parse_integer_in_string(peek_non_root_scalar("int64")); + if(result.error() == SUCCESS) { advance_non_root_scalar("int64"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double() noexcept { + auto result = numberparsing::parse_double(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_double_in_string() noexcept { + auto result = numberparsing::parse_double_in_string(peek_non_root_scalar("double")); + if(result.error() == SUCCESS) { advance_non_root_scalar("double"); } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_bool() noexcept { + auto result = parse_bool(peek_non_root_scalar("bool")); + if(result.error() == SUCCESS) { advance_non_root_scalar("bool"); } + return result; +} +simdjson_inline simdjson_result value_iterator::is_null() noexcept { + bool is_null_value; + SIMDJSON_TRY(parse_null(peek_non_root_scalar("null")).get(is_null_value)); + if(is_null_value) { advance_non_root_scalar("null"); } + return is_null_value; +} +simdjson_inline bool value_iterator::is_negative() noexcept { + return numberparsing::is_negative(peek_non_root_scalar("numbersign")); +} +simdjson_inline bool value_iterator::is_root_negative() noexcept { + return numberparsing::is_negative(peek_root_scalar("numbersign")); +} +simdjson_inline simdjson_result value_iterator::is_integer() noexcept { + return numberparsing::is_integer(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number_type() noexcept { + return numberparsing::get_number_type(peek_non_root_scalar("integer")); +} +simdjson_inline simdjson_result value_iterator::get_number() noexcept { + number num; + error_code error = numberparsing::parse_number(peek_non_root_scalar("number"), num); + if(error) { return error; } + return num; +} + +simdjson_inline simdjson_result value_iterator::is_root_integer(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("is_root_integer"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + return false; // if there are more than 20 characters, it cannot be represented as an integer. + } + auto answer = numberparsing::is_integer(tmpbuf); + // If the parsing was a success, we must still check that it is + // a single scalar. Note that we parse first because of cases like '[]' where + // getting TRAILING_CONTENT is wrong. + if(check_trailing && (answer.error() == SUCCESS) && (!_json_iter->is_single_token())) { return TRAILING_CONTENT; } + return answer; +} + +simdjson_inline simdjson_result value_iterator::get_root_number_type(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto answer = numberparsing::get_number_type(tmpbuf); + if (check_trailing && (answer.error() == SUCCESS) && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + return answer; +} +simdjson_inline simdjson_result value_iterator::get_root_number(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("number"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + number num; + error_code error = numberparsing::parse_number(tmpbuf, num); + if(error) { return error; } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("number"); + return num; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_string(bool check_trailing, bool allow_replacement) noexcept { + return get_root_raw_json_string(check_trailing).unescape(json_iter(), allow_replacement); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_wobbly_string(bool check_trailing) noexcept { + return get_root_raw_json_string(check_trailing).unescape_wobbly(json_iter()); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_raw_json_string(bool check_trailing) noexcept { + auto json = peek_scalar("string"); + if (*json != '"') { return incorrect_type_error("Not a string"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_scalar("string"); + return raw_json_string(json+1); +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_uint64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("uint64"); + uint8_t tmpbuf[20+1+1]{}; // <20 digits> is the longest possible unsigned integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_unsigned_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("uint64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_int64_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("int64"); + uint8_t tmpbuf[20+1+1]; // -<19 digits> is the longest possible integer + tmpbuf[20+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 20+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 20 characters"); + return NUMBER_ERROR; + } + + auto result = numberparsing::parse_integer_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("int64"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} + +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_double_in_string(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("double"); + // Per https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/, + // 1074 is the maximum number of significant fractional digits. Add 8 more digits for the biggest + // number: -0.e-308. + uint8_t tmpbuf[1074+8+1+1]; // +1 for null termination. + tmpbuf[1074+8+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 1074+8+1)) { + logger::log_error(*_json_iter, start_position(), depth(), "Root number more than 1082 characters"); + return NUMBER_ERROR; + } + auto result = numberparsing::parse_double_in_string(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("double"); + } + return result; +} +simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("bool"); + uint8_t tmpbuf[5+1+1]; // +1 for null termination + tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. + if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } + auto result = parse_bool(tmpbuf); + if(result.error() == SUCCESS) { + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + } + return result; +} +simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { + auto max_len = peek_start_length(); + auto json = peek_root_scalar("null"); + bool result = (max_len >= 4 && !atomparsing::str4ncmp(json, "null") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + if(result) { // we have something that looks like a null. + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("null"); + } + return result; +} + +simdjson_warn_unused simdjson_inline error_code value_iterator::skip_child() noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth >= _depth ); + + return _json_iter->skip_child(depth()); +} + +simdjson_inline value_iterator value_iterator::child() const noexcept { + assert_at_child(); + return { _json_iter, depth()+1, _json_iter->token.position() }; +} + +// GCC 7 warns when the first line of this function is inlined away into oblivion due to the caller +// relating depth and iterator depth, which is a desired effect. It does not happen if is_open is +// marked non-inline. +SIMDJSON_PUSH_DISABLE_WARNINGS +SIMDJSON_DISABLE_STRICT_OVERFLOW_WARNING +simdjson_inline bool value_iterator::is_open() const noexcept { + return _json_iter->depth() >= depth(); +} +SIMDJSON_POP_DISABLE_WARNINGS + +simdjson_inline bool value_iterator::at_end() const noexcept { + return _json_iter->at_end(); +} + +simdjson_inline bool value_iterator::at_start() const noexcept { + return _json_iter->token.position() == start_position(); +} + +simdjson_inline bool value_iterator::at_first_field() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + return _json_iter->token.position() == start_position() + 1; +} + +simdjson_inline void value_iterator::abandon() noexcept { + _json_iter->abandon(); +} + +simdjson_warn_unused simdjson_inline depth_t value_iterator::depth() const noexcept { + return _depth; +} +simdjson_warn_unused simdjson_inline error_code value_iterator::error() const noexcept { + return _json_iter->error; +} +simdjson_warn_unused simdjson_inline uint8_t *&value_iterator::string_buf_loc() noexcept { + return _json_iter->string_buf_loc(); +} +simdjson_warn_unused simdjson_inline const json_iterator &value_iterator::json_iter() const noexcept { + return *_json_iter; +} +simdjson_warn_unused simdjson_inline json_iterator &value_iterator::json_iter() noexcept { + return *_json_iter; +} + +simdjson_inline const uint8_t *value_iterator::peek_start() const noexcept { + return _json_iter->peek(start_position()); +} +simdjson_inline uint32_t value_iterator::peek_start_length() const noexcept { + return _json_iter->peek_length(start_position()); +} + +simdjson_inline const uint8_t *value_iterator::peek_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return peek_start(); } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + if (!is_at_start()) { return; } + + // Get the JSON and advance the cursor, decreasing depth to signify that we have retrieved the value. + assert_at_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::start_container(uint8_t start_char, const char *incorrect_type_message, const char *type) noexcept { + logger::log_start_value(*_json_iter, start_position(), depth(), type); + // If we're not at the position anymore, we don't want to advance the cursor. + const uint8_t *json; + if (!is_at_start()) { +#if SIMDJSON_DEVELOPMENT_CHECKS + if (!is_at_iterator_start()) { return OUT_OF_ORDER_ITERATION; } +#endif + json = peek_start(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + } else { + assert_at_start(); + /** + * We should be prudent. Let us peek. If it is not the right type, we + * return an error. Only once we have determined that we have the right + * type are we allowed to advance! + */ + json = _json_iter->peek(); + if (*json != start_char) { return incorrect_type_error(incorrect_type_message); } + _json_iter->return_current_and_advance(); + } + + + return SUCCESS; +} + + +simdjson_inline const uint8_t *value_iterator::peek_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_root(); + return _json_iter->peek(); +} +simdjson_inline const uint8_t *value_iterator::peek_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return peek_start(); } + + assert_at_non_root_start(); + return _json_iter->peek(); +} + +simdjson_inline void value_iterator::advance_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_root(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} +simdjson_inline void value_iterator::advance_non_root_scalar(const char *type) noexcept { + logger::log_value(*_json_iter, start_position(), depth(), type); + if (!is_at_start()) { return; } + + assert_at_non_root_start(); + _json_iter->return_current_and_advance(); + _json_iter->ascend_to(depth()-1); +} + +simdjson_inline error_code value_iterator::incorrect_type_error(const char *message) const noexcept { + logger::log_error(*_json_iter, start_position(), depth(), message); + return INCORRECT_TYPE; +} + +simdjson_inline bool value_iterator::is_at_start() const noexcept { + return position() == start_position(); +} + +simdjson_inline bool value_iterator::is_at_key() const noexcept { + // Keys are at the same depth as the object. + // Note here that we could be safer and check that we are within an object, + // but we do not. + return _depth == _json_iter->_depth && *_json_iter->peek() == '"'; +} + +simdjson_inline bool value_iterator::is_at_iterator_start() const noexcept { + // We can legitimately be either at the first value ([1]), or after the array if it's empty ([]). + auto delta = position() - start_position(); + return delta == 1 || delta == 2; +} + +inline void value_iterator::assert_at_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_container_start() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position == _start_position + 1 ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_next() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +simdjson_inline void value_iterator::move_at_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position); +} + +simdjson_inline void value_iterator::move_at_container_start() noexcept { + _json_iter->_depth = _depth; + _json_iter->token.set_position(_start_position + 1); +} + +simdjson_inline simdjson_result value_iterator::reset_array() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_array(); +} + +simdjson_inline simdjson_result value_iterator::reset_object() noexcept { + if(error()) { return error(); } + move_at_container_start(); + return started_object(); +} + +inline void value_iterator::assert_at_child() const noexcept { + SIMDJSON_ASSUME( _json_iter->token._position > _start_position ); + SIMDJSON_ASSUME( _json_iter->_depth == _depth + 1 ); + SIMDJSON_ASSUME( _depth > 0 ); +} + +inline void value_iterator::assert_at_root() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth == 1 ); +} + +inline void value_iterator::assert_at_non_root_start() const noexcept { + assert_at_start(); + SIMDJSON_ASSUME( _depth > 1 ); +} + +inline void value_iterator::assert_is_valid() const noexcept { + SIMDJSON_ASSUME( _json_iter != nullptr ); +} + +simdjson_inline bool value_iterator::is_valid() const noexcept { + return _json_iter != nullptr; +} + +simdjson_inline simdjson_result value_iterator::type() const noexcept { + switch (*peek_start()) { + case '{': + return json_type::object; + case '[': + return json_type::array; + case '"': + return json_type::string; + case 'n': + return json_type::null; + case 't': case 'f': + return json_type::boolean; + case '-': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return json_type::number; + default: + return TAPE_ERROR; + } +} + +simdjson_inline token_position value_iterator::start_position() const noexcept { + return _start_position; +} + +simdjson_inline token_position value_iterator::position() const noexcept { + return _json_iter->position(); +} + +simdjson_inline token_position value_iterator::end_position() const noexcept { + return _json_iter->end_position(); +} + +simdjson_inline token_position value_iterator::last_position() const noexcept { + return _json_iter->last_position(); +} + +simdjson_inline error_code value_iterator::report_error(error_code error, const char *message) noexcept { + return _json_iter->report_error(error, message); +} + +} // namespace ondemand +} // namespace westmere +} // namespace simdjson + +namespace simdjson { + +simdjson_inline simdjson_result::simdjson_result(westmere::ondemand::value_iterator &&value) noexcept + : implementation_simdjson_result_base(std::forward(value)) {} +simdjson_inline simdjson_result::simdjson_result(error_code error) noexcept + : implementation_simdjson_result_base(error) {} + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H +/* end file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ +/* end file simdjson/generic/ondemand/amalgamated.h for westmere */ +/* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ +/* begin file simdjson/westmere/end.h */ +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #include "simdjson/westmere/base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +#if !SIMDJSON_CAN_ALWAYS_RUN_WESTMERE +SIMDJSON_UNTARGET_REGION +#endif + +/* undefining SIMDJSON_IMPLEMENTATION from "westmere" */ +#undef SIMDJSON_IMPLEMENTATION +/* end file simdjson/westmere/end.h */ + +#endif // SIMDJSON_WESTMERE_IMPLEMENTATION_H +/* end file simdjson/westmere/ondemand.h */ +#else +#error Unknown SIMDJSON_BUILTIN_IMPLEMENTATION +#endif + +/* undefining SIMDJSON_CONDITIONAL_INCLUDE */ +#undef SIMDJSON_CONDITIONAL_INCLUDE + +namespace simdjson { + /** + * @copydoc simdjson::SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand + */ + namespace ondemand = SIMDJSON_BUILTIN_IMPLEMENTATION::ondemand; +} // namespace simdjson + +#endif // SIMDJSON_BUILTIN_ONDEMAND_H +/* end file simdjson/builtin/ondemand.h */ + +namespace simdjson { + /** + * @copydoc simdjson::builtin::ondemand + */ + namespace ondemand = builtin::ondemand; +} // namespace simdjson + +#endif // SIMDJSON_ONDEMAND_H +/* end file simdjson/ondemand.h */ + +#endif // SIMDJSON_H +/* end file simdjson.h */ diff --git a/src/3rd_party/skarupke_maps/bytell_hash_map.hpp b/src/3rd_party/skarupke_maps/bytell_hash_map.hpp new file mode 100644 index 00000000..f06ae1c6 --- /dev/null +++ b/src/3rd_party/skarupke_maps/bytell_hash_map.hpp @@ -0,0 +1,1297 @@ +//This library has been edited by Amalgam to reduce memory use +// by making _max_load_factor a constant +// remove the BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION definition to get +// the original behavior +#define BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + +// Copyright Malte Skarupke 2017. +// Distributed under the Boost Software License, Version 1.0. +// (See http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "flat_hash_map.hpp" +#include +#include + +#ifdef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION +//pack alignment tight +#pragma pack(push, 1) +#endif + +namespace ska +{ + +namespace detailv8 +{ +using ska::detailv3::functor_storage; +using ska::detailv3::KeyOrValueHasher; +using ska::detailv3::KeyOrValueEquality; +using ska::detailv3::AssignIfTrue; +using ska::detailv3::HashPolicySelector; + +template +struct sherwood_v8_constants +{ + static constexpr int8_t magic_for_empty = int8_t(0b11111111); + static constexpr int8_t magic_for_reserved = int8_t(0b11111110); + static constexpr int8_t bits_for_direct_hit = int8_t(0b10000000); + static constexpr int8_t magic_for_direct_hit = int8_t(0b00000000); + static constexpr int8_t magic_for_list_entry = int8_t(0b10000000); + + static constexpr int8_t bits_for_distance = int8_t(0b01111111); + inline static int distance_from_metadata(int8_t metadata) + { + return metadata & bits_for_distance; + } + + static constexpr int num_jump_distances = 126; + // jump distances chosen like this: + // 1. pick the first 16 integers to promote staying in the same block + // 2. add the next 66 triangular numbers to get even jumps when + // the hash table is a power of two + // 3. add 44 more triangular numbers at a much steeper growth rate + // to get a sequence that allows large jumps so that a table + // with 10000 sequential numbers doesn't endlessly re-allocate + static constexpr size_t jump_distances[num_jump_distances] + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + + 21, 28, 36, 45, 55, 66, 78, 91, 105, 120, 136, 153, 171, 190, 210, 231, + 253, 276, 300, 325, 351, 378, 406, 435, 465, 496, 528, 561, 595, 630, + 666, 703, 741, 780, 820, 861, 903, 946, 990, 1035, 1081, 1128, 1176, + 1225, 1275, 1326, 1378, 1431, 1485, 1540, 1596, 1653, 1711, 1770, 1830, + 1891, 1953, 2016, 2080, 2145, 2211, 2278, 2346, 2415, 2485, 2556, + + 3741, 8385, 18915, 42486, 95703, 215496, 485605, 1091503, 2456436, + 5529475, 12437578, 27986421, 62972253, 141700195, 318819126, 717314626, + 1614000520, 3631437253, 8170829695, 18384318876, 41364501751, + 93070021080, 209407709220, 471167588430, 1060127437995, 2385287281530, + 5366895564381, 12075513791265, 27169907873235, 61132301007778, + 137547673121001, 309482258302503, 696335090510256, 1566753939653640, + 3525196427195653, 7931691866727775, 17846306747368716, + 40154190394120111, 90346928493040500, 203280588949935750, + 457381324898247375, 1029107980662394500, 2315492957028380766, + 5209859150892887590, + }; +}; +template +constexpr int8_t sherwood_v8_constants::magic_for_empty; +template +constexpr int8_t sherwood_v8_constants::magic_for_reserved; +template +constexpr int8_t sherwood_v8_constants::bits_for_direct_hit; +template +constexpr int8_t sherwood_v8_constants::magic_for_direct_hit; +template +constexpr int8_t sherwood_v8_constants::magic_for_list_entry; + +template +constexpr int8_t sherwood_v8_constants::bits_for_distance; + +template +constexpr int sherwood_v8_constants::num_jump_distances; +template +constexpr size_t sherwood_v8_constants::jump_distances[num_jump_distances]; + +template +struct sherwood_v8_block +{ + sherwood_v8_block() + { + } + ~sherwood_v8_block() + { + } + int8_t control_bytes[BlockSize]; + union + { + T data[BlockSize]; + }; + + static sherwood_v8_block * empty_block() + { + static std::array empty_bytes = [] + { + std::array result; + result.fill(sherwood_v8_constants<>::magic_for_empty); + return result; + }(); + return reinterpret_cast(&empty_bytes); + } + + int first_empty_index() const + { + for (int i = 0; i < BlockSize; ++i) + { + if (control_bytes[i] == sherwood_v8_constants<>::magic_for_empty) + return i; + } + return -1; + } + + void fill_control_bytes(int8_t value) + { + std::fill(std::begin(control_bytes), std::end(control_bytes), value); + } +}; + +template +class sherwood_v8_table : private ByteAlloc, private Hasher, private Equal +{ + using AllocatorTraits = std::allocator_traits; + using BlockType = sherwood_v8_block; + using BlockPointer = BlockType *; + using BytePointer = typename AllocatorTraits::pointer; + struct convertible_to_iterator; + using Constants = sherwood_v8_constants<>; + +public: + +#ifdef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + static constexpr float _max_load_factor = 0.5f; +#endif + + using value_type = T; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + using hasher = ArgumentHash; + using key_equal = ArgumentEqual; + using allocator_type = ByteAlloc; + using reference = value_type &; + using const_reference = const value_type &; + using pointer = value_type *; + using const_pointer = const value_type *; + + sherwood_v8_table() + { + } + explicit sherwood_v8_table(size_type bucket_count, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc()) + : ByteAlloc(alloc), Hasher(hash), Equal(equal) + { + if (bucket_count) + rehash(bucket_count); + } + sherwood_v8_table(size_type bucket_count, const ArgumentAlloc & alloc) + : sherwood_v8_table(bucket_count, ArgumentHash(), ArgumentEqual(), alloc) + { + } + sherwood_v8_table(size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc) + : sherwood_v8_table(bucket_count, hash, ArgumentEqual(), alloc) + { + } + explicit sherwood_v8_table(const ArgumentAlloc & alloc) + : ByteAlloc(alloc) + { + } + template + sherwood_v8_table(It first, It last, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc()) + : sherwood_v8_table(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + template + sherwood_v8_table(It first, It last, size_type bucket_count, const ArgumentAlloc & alloc) + : sherwood_v8_table(first, last, bucket_count, ArgumentHash(), ArgumentEqual(), alloc) + { + } + template + sherwood_v8_table(It first, It last, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc) + : sherwood_v8_table(first, last, bucket_count, hash, ArgumentEqual(), alloc) + { + } + sherwood_v8_table(std::initializer_list il, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc()) + : sherwood_v8_table(bucket_count, hash, equal, alloc) + { + if (bucket_count == 0) + rehash(il.size()); + insert(il.begin(), il.end()); + } + sherwood_v8_table(std::initializer_list il, size_type bucket_count, const ArgumentAlloc & alloc) + : sherwood_v8_table(il, bucket_count, ArgumentHash(), ArgumentEqual(), alloc) + { + } + sherwood_v8_table(std::initializer_list il, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc) + : sherwood_v8_table(il, bucket_count, hash, ArgumentEqual(), alloc) + { + } + sherwood_v8_table(const sherwood_v8_table & other) + : sherwood_v8_table(other, AllocatorTraits::select_on_container_copy_construction(other.get_allocator())) + { + } + sherwood_v8_table(const sherwood_v8_table & other, const ArgumentAlloc & alloc) + : ByteAlloc(alloc), Hasher(other), Equal(other) +#ifndef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + , _max_load_factor(other._max_load_factor) +#endif + { + rehash_for_other_container(other); + try + { + insert(other.begin(), other.end()); + } + catch(...) + { + clear(); + deallocate_data(entries, num_slots_minus_one); + throw; + } + } + sherwood_v8_table(sherwood_v8_table && other) noexcept + : ByteAlloc(std::move(other)), Hasher(std::move(other)), Equal(std::move(other)) +#ifndef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + , _max_load_factor(other._max_load_factor) +#endif + { + swap_pointers(other); + } + sherwood_v8_table(sherwood_v8_table && other, const ArgumentAlloc & alloc) noexcept + : ByteAlloc(alloc), Hasher(std::move(other)), Equal(std::move(other)) +#ifndef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + , _max_load_factor(other._max_load_factor) +#endif + { + swap_pointers(other); + } + sherwood_v8_table & operator=(const sherwood_v8_table & other) + { + if (this == std::addressof(other)) + return *this; + + clear(); + if (AllocatorTraits::propagate_on_container_copy_assignment::value) + { + if (static_cast(*this) != static_cast(other)) + { + reset_to_empty_state(); + } + AssignIfTrue()(*this, other); + } +#ifndef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + _max_load_factor = other._max_load_factor; +#endif + static_cast(*this) = other; + static_cast(*this) = other; + rehash_for_other_container(other); + insert(other.begin(), other.end()); + return *this; + } + sherwood_v8_table & operator=(sherwood_v8_table && other) noexcept + { + if (this == std::addressof(other)) + return *this; + else if (AllocatorTraits::propagate_on_container_move_assignment::value) + { + clear(); + reset_to_empty_state(); + AssignIfTrue()(*this, std::move(other)); + swap_pointers(other); + } + else if (static_cast(*this) == static_cast(other)) + { + swap_pointers(other); + } + else + { + clear(); +#ifndef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + _max_load_factor = other._max_load_factor; +#endif + + rehash_for_other_container(other); + for (T & elem : other) + emplace(std::move(elem)); + other.clear(); + } + static_cast(*this) = std::move(other); + static_cast(*this) = std::move(other); + return *this; + } + ~sherwood_v8_table() + { + clear(); + deallocate_data(entries, num_slots_minus_one); + } + + const allocator_type & get_allocator() const + { + return static_cast(*this); + } + const ArgumentEqual & key_eq() const + { + return static_cast(*this); + } + const ArgumentHash & hash_function() const + { + return static_cast(*this); + } + + template + struct templated_iterator + { + private: + friend class sherwood_v8_table; + BlockPointer current = BlockPointer(); + size_t index = 0; + + public: + templated_iterator() + { + } + templated_iterator(BlockPointer entries, size_t index) + : current(entries) + , index(index) + { + } + + using iterator_category = std::forward_iterator_tag; + using value_type = ValueType; + using difference_type = ptrdiff_t; + using pointer = ValueType *; + using reference = ValueType &; + + friend bool operator==(const templated_iterator & lhs, const templated_iterator & rhs) + { + return lhs.index == rhs.index; + } + friend bool operator!=(const templated_iterator & lhs, const templated_iterator & rhs) + { + return !(lhs == rhs); + } + + templated_iterator & operator++() + { + do + { + if (index % BlockSize == 0) + --current; + if (index-- == 0) + break; + } + while(current->control_bytes[index % BlockSize] == Constants::magic_for_empty); + return *this; + } + templated_iterator operator++(int) + { + templated_iterator copy(*this); + ++*this; + return copy; + } + + ValueType & operator*() const + { + return current->data[index % BlockSize]; + } + ValueType * operator->() const + { + return current->data + index % BlockSize; + } + + operator templated_iterator() const + { + return { current, index }; + } + }; + using iterator = templated_iterator; + using const_iterator = templated_iterator; + + iterator begin() + { + size_t num_slots = num_slots_minus_one ? num_slots_minus_one + 1 : 0; + return ++iterator{ entries + num_slots / BlockSize, num_slots }; + } + const_iterator begin() const + { + size_t num_slots = num_slots_minus_one ? num_slots_minus_one + 1 : 0; + return ++iterator{ entries + num_slots / BlockSize, num_slots }; + } + const_iterator cbegin() const + { + return begin(); + } + iterator end() + { + return { entries - 1, std::numeric_limits::max() }; + } + const_iterator end() const + { + return { entries - 1, std::numeric_limits::max() }; + } + const_iterator cend() const + { + return end(); + } + + inline iterator find(const FindKey & key) + { + size_t index = hash_object(key); + size_t num_slots_minus_one = this->num_slots_minus_one; + BlockPointer entries = this->entries; + index = hash_policy.index_for_hash(index, num_slots_minus_one); + bool first = true; + for (;;) + { + size_t block_index = index / BlockSize; + int index_in_block = index % BlockSize; + BlockPointer block = entries + block_index; + int8_t metadata = block->control_bytes[index_in_block]; + if (first) + { + if ((metadata & Constants::bits_for_direct_hit) != Constants::magic_for_direct_hit) + return end(); + first = false; + } + if (compares_equal(key, block->data[index_in_block])) + return { block, index }; + int8_t to_next_index = metadata & Constants::bits_for_distance; + if (to_next_index == 0) + return end(); + index += Constants::jump_distances[to_next_index]; + index = hash_policy.keep_in_range(index, num_slots_minus_one); + } + } + inline const_iterator find(const FindKey & key) const + { + return const_cast(this)->find(key); + } + size_t count(const FindKey & key) const + { + return find(key) == end() ? 0 : 1; + } + std::pair equal_range(const FindKey & key) + { + iterator found = find(key); + if (found == end()) + return { found, found }; + else + return { found, std::next(found) }; + } + std::pair equal_range(const FindKey & key) const + { + const_iterator found = find(key); + if (found == end()) + return { found, found }; + else + return { found, std::next(found) }; + } + + + template + inline std::pair emplace(Key && key, Args &&... args) + { + size_t index = hash_object(key); + size_t num_slots_minus_one = this->num_slots_minus_one; + BlockPointer entries = this->entries; + index = hash_policy.index_for_hash(index, num_slots_minus_one); + bool first = true; + for (;;) + { + size_t block_index = index / BlockSize; + int index_in_block = index % BlockSize; + BlockPointer block = entries + block_index; + int8_t metadata = block->control_bytes[index_in_block]; + if (first) + { + if ((metadata & Constants::bits_for_direct_hit) != Constants::magic_for_direct_hit) + return emplace_direct_hit({ index, block }, std::forward(key), std::forward(args)...); + first = false; + } + if (compares_equal(key, block->data[index_in_block])) + return { { block, index }, false }; + int8_t to_next_index = metadata & Constants::bits_for_distance; + if (to_next_index == 0) + return emplace_new_key({ index, block }, std::forward(key), std::forward(args)...); + index += Constants::jump_distances[to_next_index]; + index = hash_policy.keep_in_range(index, num_slots_minus_one); + } + } + + std::pair insert(const value_type & value) + { + return emplace(value); + } + std::pair insert(value_type && value) + { + return emplace(std::move(value)); + } + template + iterator emplace_hint(const_iterator, Args &&... args) + { + return emplace(std::forward(args)...).first; + } + iterator insert(const_iterator, const value_type & value) + { + return emplace(value).first; + } + iterator insert(const_iterator, value_type && value) + { + return emplace(std::move(value)).first; + } + + template + void insert(It begin, It end) + { + for (; begin != end; ++begin) + { + emplace(*begin); + } + } + void insert(std::initializer_list il) + { + insert(il.begin(), il.end()); + } + + void rehash(size_t num_items) + { + num_items = std::max(num_items, static_cast(std::ceil(num_elements / static_cast(_max_load_factor)))); + if (num_items == 0) + { + reset_to_empty_state(); + return; + } + auto new_prime_index = hash_policy.next_size_over(num_items); + if (num_items == num_slots_minus_one + 1) + return; + size_t num_blocks = num_items / BlockSize; + if (num_items % BlockSize) + ++num_blocks; + size_t memory_requirement = calculate_memory_requirement(num_blocks); + unsigned char * new_memory = &*AllocatorTraits::allocate(*this, memory_requirement); + + BlockPointer new_buckets = reinterpret_cast(new_memory); + + BlockPointer special_end_item = new_buckets + num_blocks; + for (BlockPointer it = new_buckets; it <= special_end_item; ++it) + it->fill_control_bytes(Constants::magic_for_empty); + using std::swap; + swap(entries, new_buckets); + swap(num_slots_minus_one, num_items); + --num_slots_minus_one; + hash_policy.commit(new_prime_index); + num_elements = 0; + if (num_items) + ++num_items; + size_t old_num_blocks = num_items / BlockSize; + if (num_items % BlockSize) + ++old_num_blocks; + for (BlockPointer it = new_buckets, end = new_buckets + old_num_blocks; it != end; ++it) + { + for (int i = 0; i < BlockSize; ++i) + { + int8_t metadata = it->control_bytes[i]; + if (metadata != Constants::magic_for_empty && metadata != Constants::magic_for_reserved) + { + emplace(std::move(it->data[i])); + AllocatorTraits::destroy(*this, it->data + i); + } + } + } + deallocate_data(new_buckets, num_items - 1); + } + + void reserve(size_t num_elements) + { + size_t required_buckets = num_buckets_for_reserve(num_elements); + if (required_buckets > bucket_count()) + rehash(required_buckets); + } + + // the return value is a type that can be converted to an iterator + // the reason for doing this is that it's not free to find the + // iterator pointing at the next element. if you care about the + // next iterator, turn the return value into an iterator + convertible_to_iterator erase(const_iterator to_erase) + { + LinkedListIt current = { to_erase.index, to_erase.current }; + if (current.has_next()) + { + LinkedListIt previous = current; + LinkedListIt next = current.next(*this); + while (next.has_next()) + { + previous = next; + next = next.next(*this); + } + AllocatorTraits::destroy(*this, std::addressof(*current)); + AllocatorTraits::construct(*this, std::addressof(*current), std::move(*next)); + AllocatorTraits::destroy(*this, std::addressof(*next)); + next.set_metadata(Constants::magic_for_empty); + previous.clear_next(); + } + else + { + if (!current.is_direct_hit()) + find_parent_block(current).clear_next(); + AllocatorTraits::destroy(*this, std::addressof(*current)); + current.set_metadata(Constants::magic_for_empty); + } + --num_elements; + return { to_erase.current, to_erase.index }; + } + + iterator erase(const_iterator begin_it, const_iterator end_it) + { + if (begin_it == end_it) + return { begin_it.current, begin_it.index }; + if (std::next(begin_it) == end_it) + return erase(begin_it); + if (begin_it == begin() && end_it == end()) + { + clear(); + return { end_it.current, end_it.index }; + } + std::vector> depth_in_chain; + for (const_iterator it = begin_it; it != end_it; ++it) + { + LinkedListIt list_it(it.index, it.current); + if (list_it.is_direct_hit()) + depth_in_chain.emplace_back(0, list_it); + else + { + LinkedListIt root = find_direct_hit(list_it); + int distance = 1; + for (;;) + { + LinkedListIt next = root.next(*this); + if (next == list_it) + break; + ++distance; + root = next; + } + depth_in_chain.emplace_back(distance, list_it); + } + } + std::sort(depth_in_chain.begin(), depth_in_chain.end(), [](const auto & a, const auto & b) { return a.first < b.first; }); + for (auto it = depth_in_chain.rbegin(), end = depth_in_chain.rend(); it != end; ++it) + { + erase(it->second.it()); + } + + if (begin_it.current->control_bytes[begin_it.index % BlockSize] == Constants::magic_for_empty) + return ++iterator{ begin_it.current, begin_it.index }; + else + return { begin_it.current, begin_it.index }; + } + + size_t erase(const FindKey & key) + { + auto found = find(key); + if (found == end()) + return 0; + else + { + erase(found); + return 1; + } + } + + void clear() + { + if (!num_slots_minus_one) + return; + size_t num_slots = num_slots_minus_one + 1; + size_t num_blocks = num_slots / BlockSize; + if (num_slots % BlockSize) + ++num_blocks; + for (BlockPointer it = entries, end = it + num_blocks; it != end; ++it) + { + for (int i = 0; i < BlockSize; ++i) + { + if (it->control_bytes[i] != Constants::magic_for_empty) + { + AllocatorTraits::destroy(*this, std::addressof(it->data[i])); + it->control_bytes[i] = Constants::magic_for_empty; + } + } + } + num_elements = 0; + } + + void shrink_to_fit() + { + rehash_for_other_container(*this); + } + + void swap(sherwood_v8_table & other) + { + using std::swap; + swap_pointers(other); + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + if (AllocatorTraits::propagate_on_container_swap::value) + swap(static_cast(*this), static_cast(other)); + } + + size_t size() const + { + return num_elements; + } + size_t max_size() const + { + return (AllocatorTraits::max_size(*this)) / sizeof(T); + } + size_t bucket_count() const + { + return num_slots_minus_one ? num_slots_minus_one + 1 : 0; + } + size_type max_bucket_count() const + { + return (AllocatorTraits::max_size(*this)) / sizeof(T); + } + size_t bucket(const FindKey & key) const + { + return hash_policy.index_for_hash(hash_object(key), num_slots_minus_one); + } + float load_factor() const + { + return static_cast(num_elements) / (num_slots_minus_one + 1); + } + void max_load_factor(float value) + { +#ifndef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + _max_load_factor = value; +#endif + } + float max_load_factor() const + { + return _max_load_factor; + } + + bool empty() const + { + return num_elements == 0; + } + +private: + BlockPointer entries = BlockType::empty_block(); + size_t num_slots_minus_one = 0; + typename HashPolicySelector::type hash_policy; +#ifndef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + float _max_load_factor = 0.9375f; +#endif + size_t num_elements = 0; + + size_t num_buckets_for_reserve(size_t num_elements) const + { + return static_cast(std::ceil(num_elements / static_cast(_max_load_factor))); + } + void rehash_for_other_container(const sherwood_v8_table & other) + { + rehash(std::min(num_buckets_for_reserve(other.size()), other.bucket_count())); + } + bool is_full() const + { + if (!num_slots_minus_one) + return true; + else + return num_elements + 1 > (num_slots_minus_one + 1) * static_cast(_max_load_factor); + } + + void swap_pointers(sherwood_v8_table & other) + { + using std::swap; + swap(hash_policy, other.hash_policy); + swap(entries, other.entries); + swap(num_slots_minus_one, other.num_slots_minus_one); + swap(num_elements, other.num_elements); +#ifndef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION + swap(_max_load_factor, other._max_load_factor); +#endif + } + + struct LinkedListIt + { + size_t index = 0; + BlockPointer block = nullptr; + + LinkedListIt() + { + } + LinkedListIt(size_t index, BlockPointer block) + : index(index), block(block) + { + } + + iterator it() const + { + return { block, index }; + } + int index_in_block() const + { + return index % BlockSize; + } + bool is_direct_hit() const + { + return (metadata() & Constants::bits_for_direct_hit) == Constants::magic_for_direct_hit; + } + bool is_empty() const + { + return metadata() == Constants::magic_for_empty; + } + bool has_next() const + { + return jump_index() != 0; + } + int8_t jump_index() const + { + return Constants::distance_from_metadata(metadata()); + } + int8_t metadata() const + { + return block->control_bytes[index_in_block()]; + } + void set_metadata(int8_t metadata) + { + block->control_bytes[index_in_block()] = metadata; + } + + LinkedListIt next(sherwood_v8_table & table) const + { + int8_t distance = jump_index(); + size_t next_index = table.hash_policy.keep_in_range(index + Constants::jump_distances[distance], table.num_slots_minus_one); + return { next_index, table.entries + next_index / BlockSize }; + } + void set_next(int8_t jump_index) + { + int8_t & metadata = block->control_bytes[index_in_block()]; + metadata = (metadata & ~Constants::bits_for_distance) | jump_index; + } + void clear_next() + { + set_next(0); + } + + value_type & operator*() const + { + return block->data[index_in_block()]; + } + bool operator!() const + { + return !block; + } + explicit operator bool() const + { + return block != nullptr; + } + bool operator==(const LinkedListIt & other) const + { + return index == other.index; + } + bool operator!=(const LinkedListIt & other) const + { + return !(*this == other); + } + }; + + template + SKA_NOINLINE(std::pair) emplace_direct_hit(LinkedListIt block, Args &&... args) + { + using std::swap; + if (is_full()) + { + grow(); + return emplace(std::forward(args)...); + } + if (block.metadata() == Constants::magic_for_empty) + { + AllocatorTraits::construct(*this, std::addressof(*block), std::forward(args)...); + block.set_metadata(Constants::magic_for_direct_hit); + ++num_elements; + return { block.it(), true }; + } + else + { + LinkedListIt parent_block = find_parent_block(block); + std::pair free_block = find_free_index(parent_block); + if (!free_block.first) + { + grow(); + return emplace(std::forward(args)...); + } + value_type new_value(std::forward(args)...); + for (LinkedListIt it = block;;) + { + AllocatorTraits::construct(*this, std::addressof(*free_block.second), std::move(*it)); + AllocatorTraits::destroy(*this, std::addressof(*it)); + parent_block.set_next(free_block.first); + free_block.second.set_metadata(Constants::magic_for_list_entry); + if (!it.has_next()) + { + it.set_metadata(Constants::magic_for_empty); + break; + } + LinkedListIt next = it.next(*this); + it.set_metadata(Constants::magic_for_empty); + block.set_metadata(Constants::magic_for_reserved); + it = next; + parent_block = free_block.second; + free_block = find_free_index(free_block.second); + if (!free_block.first) + { + grow(); + return emplace(std::move(new_value)); + } + } + AllocatorTraits::construct(*this, std::addressof(*block), std::move(new_value)); + block.set_metadata(Constants::magic_for_direct_hit); + ++num_elements; + return { block.it(), true }; + } + } + + template + SKA_NOINLINE(std::pair) emplace_new_key(LinkedListIt parent, Args &&... args) + { + if (is_full()) + { + grow(); + return emplace(std::forward(args)...); + } + std::pair free_block = find_free_index(parent); + if (!free_block.first) + { + grow(); + return emplace(std::forward(args)...); + } + AllocatorTraits::construct(*this, std::addressof(*free_block.second), std::forward(args)...); + free_block.second.set_metadata(Constants::magic_for_list_entry); + parent.set_next(free_block.first); + ++num_elements; + return { free_block.second.it(), true }; + } + + LinkedListIt find_direct_hit(LinkedListIt child) const + { + size_t to_move_hash = hash_object(*child); + size_t to_move_index = hash_policy.index_for_hash(to_move_hash, num_slots_minus_one); + return { to_move_index, entries + to_move_index / BlockSize }; + } + LinkedListIt find_parent_block(LinkedListIt child) + { + LinkedListIt parent_block = find_direct_hit(child); + for (;;) + { + LinkedListIt next = parent_block.next(*this); + if (next == child) + return parent_block; + parent_block = next; + } + } + + std::pair find_free_index(LinkedListIt parent) const + { + for (int8_t jump_index = 1; jump_index < Constants::num_jump_distances; ++jump_index) + { + size_t index = hash_policy.keep_in_range(parent.index + Constants::jump_distances[jump_index], num_slots_minus_one); + BlockPointer block = entries + index / BlockSize; + if (block->control_bytes[index % BlockSize] == Constants::magic_for_empty) + return { jump_index, { index, block } }; + } + return { 0, {} }; + } + + void grow() + { + rehash(std::max(size_t(10), 2 * bucket_count())); + } + + size_t calculate_memory_requirement(size_t num_blocks) + { + size_t memory_required = sizeof(BlockType) * num_blocks; + memory_required += BlockSize; // for metadata of past-the-end pointer + return memory_required; + } + + void deallocate_data(BlockPointer begin, size_t num_slots_minus_one) + { + if (begin == BlockType::empty_block()) + return; + + ++num_slots_minus_one; + size_t num_blocks = num_slots_minus_one / BlockSize; + if (num_slots_minus_one % BlockSize) + ++num_blocks; + size_t memory = calculate_memory_requirement(num_blocks); + unsigned char * as_byte_pointer = reinterpret_cast(begin); + AllocatorTraits::deallocate(*this, typename AllocatorTraits::pointer(as_byte_pointer), memory); + } + + void reset_to_empty_state() + { + deallocate_data(entries, num_slots_minus_one); + entries = BlockType::empty_block(); + num_slots_minus_one = 0; + hash_policy.reset(); + } + + template + size_t hash_object(const U & key) + { + return static_cast(*this)(key); + } + template + size_t hash_object(const U & key) const + { + return static_cast(*this)(key); + } + template + bool compares_equal(const L & lhs, const R & rhs) + { + return static_cast(*this)(lhs, rhs); + } + + struct convertible_to_iterator + { + BlockPointer it; + size_t index; + + operator iterator() + { + if (it->control_bytes[index % BlockSize] == Constants::magic_for_empty) + return ++iterator{it, index}; + else + return { it, index }; + } + operator const_iterator() + { + if (it->control_bytes[index % BlockSize] == Constants::magic_for_empty) + return ++iterator{it, index}; + else + return { it, index }; + } + }; +}; +template +struct AlignmentOr8Bytes +{ + static constexpr size_t value = 8; +}; +template +struct AlignmentOr8Bytes= 1>::type> +{ + static constexpr size_t value = alignof(T); +}; +template +struct CalculateBytellBlockSize; +template +struct CalculateBytellBlockSize +{ + static constexpr size_t this_value = AlignmentOr8Bytes::value; + static constexpr size_t base_value = CalculateBytellBlockSize::value; + static constexpr size_t value = this_value > base_value ? this_value : base_value; +}; +template<> +struct CalculateBytellBlockSize<> +{ + static constexpr size_t value = 8; +}; +} + +template, typename E = std::equal_to, typename A = std::allocator > > +class bytell_hash_map + : public detailv8::sherwood_v8_table + < + std::pair, + K, + H, + detailv8::KeyOrValueHasher, H>, + E, + detailv8::KeyOrValueEquality, E>, + A, + typename std::allocator_traits::template rebind_alloc, + detailv8::CalculateBytellBlockSize::value + > +{ + using Table = detailv8::sherwood_v8_table + < + std::pair, + K, + H, + detailv8::KeyOrValueHasher, H>, + E, + detailv8::KeyOrValueEquality, E>, + A, + typename std::allocator_traits::template rebind_alloc, + detailv8::CalculateBytellBlockSize::value + >; +public: + + using key_type = K; + using mapped_type = V; + + using Table::Table; + bytell_hash_map() + { + } + + inline V & operator[](const K & key) + { + return emplace(key, convertible_to_value()).first->second; + } + inline V & operator[](K && key) + { + return emplace(std::move(key), convertible_to_value()).first->second; + } + V & at(const K & key) + { + auto found = this->find(key); + if (found == this->end()) + throw std::out_of_range("Argument passed to at() was not in the map."); + return found->second; + } + const V & at(const K & key) const + { + auto found = this->find(key); + if (found == this->end()) + throw std::out_of_range("Argument passed to at() was not in the map."); + return found->second; + } + + using Table::emplace; + std::pair emplace() + { + return emplace(key_type(), convertible_to_value()); + } + template + std::pair insert_or_assign(const key_type & key, M && m) + { + auto emplace_result = emplace(key, std::forward(m)); + if (!emplace_result.second) + emplace_result.first->second = std::forward(m); + return emplace_result; + } + template + std::pair insert_or_assign(key_type && key, M && m) + { + auto emplace_result = emplace(std::move(key), std::forward(m)); + if (!emplace_result.second) + emplace_result.first->second = std::forward(m); + return emplace_result; + } + template + typename Table::iterator insert_or_assign(typename Table::const_iterator, const key_type & key, M && m) + { + return insert_or_assign(key, std::forward(m)).first; + } + template + typename Table::iterator insert_or_assign(typename Table::const_iterator, key_type && key, M && m) + { + return insert_or_assign(std::move(key), std::forward(m)).first; + } + + friend bool operator==(const bytell_hash_map & lhs, const bytell_hash_map & rhs) + { + if (lhs.size() != rhs.size()) + return false; + for (const typename Table::value_type & value : lhs) + { + auto found = rhs.find(value.first); + if (found == rhs.end()) + return false; + else if (value.second != found->second) + return false; + } + return true; + } + friend bool operator!=(const bytell_hash_map & lhs, const bytell_hash_map & rhs) + { + return !(lhs == rhs); + } + +private: + struct convertible_to_value + { + operator V() const + { + return V(); + } + }; +}; + +template, typename E = std::equal_to, typename A = std::allocator > +class bytell_hash_set + : public detailv8::sherwood_v8_table + < + T, + T, + H, + detailv8::functor_storage, + E, + detailv8::functor_storage, + A, + typename std::allocator_traits::template rebind_alloc, + detailv8::CalculateBytellBlockSize::value + > +{ + using Table = detailv8::sherwood_v8_table + < + T, + T, + H, + detailv8::functor_storage, + E, + detailv8::functor_storage, + A, + typename std::allocator_traits::template rebind_alloc, + detailv8::CalculateBytellBlockSize::value + >; +public: + + using key_type = T; + + using Table::Table; + bytell_hash_set() + { + } + + template + std::pair emplace(Args &&... args) + { + return Table::emplace(T(std::forward(args)...)); + } + std::pair emplace(const key_type & arg) + { + return Table::emplace(arg); + } + std::pair emplace(key_type & arg) + { + return Table::emplace(arg); + } + std::pair emplace(const key_type && arg) + { + return Table::emplace(std::move(arg)); + } + std::pair emplace(key_type && arg) + { + return Table::emplace(std::move(arg)); + } + + friend bool operator==(const bytell_hash_set & lhs, const bytell_hash_set & rhs) + { + if (lhs.size() != rhs.size()) + return false; + for (const T & value : lhs) + { + if (rhs.find(value) == rhs.end()) + return false; + } + return true; + } + friend bool operator!=(const bytell_hash_set & lhs, const bytell_hash_set & rhs) + { + return !(lhs == rhs); + } +}; + +} // end namespace ska + +#ifdef BYTELL_HASH_MAP_AMALGAM_MEM_REDUCTION +#pragma pack(pop) +#endif diff --git a/src/3rd_party/skarupke_maps/flat_hash_map.hpp b/src/3rd_party/skarupke_maps/flat_hash_map.hpp new file mode 100644 index 00000000..222fc901 --- /dev/null +++ b/src/3rd_party/skarupke_maps/flat_hash_map.hpp @@ -0,0 +1,1528 @@ +//This library has been edited by Amalgam to reduce memory use +// by making _max_load_factor a constant +// remove the FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION definition to get +// the original behavior +#define FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION + +// Copyright Malte Skarupke 2017. +// Distributed under the Boost Software License, Version 1.0. +// (See http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#define SKA_NOINLINE(...) __declspec(noinline) __VA_ARGS__ +#else +#define SKA_NOINLINE(...) __VA_ARGS__ __attribute__((noinline)) +#endif + +#ifdef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION +//pack alignment tight +#pragma pack(push, 1) +#endif + +namespace ska +{ +struct prime_number_hash_policy; +struct power_of_two_hash_policy; +struct fibonacci_hash_policy; + +namespace detailv3 +{ +template +struct functor_storage : Functor +{ + functor_storage() = default; + functor_storage(const Functor & functor) + : Functor(functor) + { + } + template + Result operator()(Args &&... args) + { + return static_cast(*this)(std::forward(args)...); + } + template + Result operator()(Args &&... args) const + { + return static_cast(*this)(std::forward(args)...); + } +}; +template +struct functor_storage +{ + typedef Result (*function_ptr)(Args...); + function_ptr function; + functor_storage(function_ptr function) + : function(function) + { + } + Result operator()(Args... args) const + { + return function(std::forward(args)...); + } + operator function_ptr &() + { + return function; + } + operator const function_ptr &() + { + return function; + } +}; +template +struct KeyOrValueHasher : functor_storage +{ + typedef functor_storage hasher_storage; + KeyOrValueHasher() = default; + KeyOrValueHasher(const hasher & hash) + : hasher_storage(hash) + { + } + size_t operator()(const key_type & key) + { + return static_cast(*this)(key); + } + size_t operator()(const key_type & key) const + { + return static_cast(*this)(key); + } + size_t operator()(const value_type & value) + { + return static_cast(*this)(value.first); + } + size_t operator()(const value_type & value) const + { + return static_cast(*this)(value.first); + } + template + size_t operator()(const std::pair & value) + { + return static_cast(*this)(value.first); + } + template + size_t operator()(const std::pair & value) const + { + return static_cast(*this)(value.first); + } +}; +template +struct KeyOrValueEquality : functor_storage +{ + typedef functor_storage equality_storage; + KeyOrValueEquality() = default; + KeyOrValueEquality(const key_equal & equality) + : equality_storage(equality) + { + } + bool operator()(const key_type & lhs, const key_type & rhs) + { + return static_cast(*this)(lhs, rhs); + } + bool operator()(const key_type & lhs, const value_type & rhs) + { + return static_cast(*this)(lhs, rhs.first); + } + bool operator()(const value_type & lhs, const key_type & rhs) + { + return static_cast(*this)(lhs.first, rhs); + } + bool operator()(const value_type & lhs, const value_type & rhs) + { + return static_cast(*this)(lhs.first, rhs.first); + } + template + bool operator()(const key_type & lhs, const std::pair & rhs) + { + return static_cast(*this)(lhs, rhs.first); + } + template + bool operator()(const std::pair & lhs, const key_type & rhs) + { + return static_cast(*this)(lhs.first, rhs); + } + template + bool operator()(const value_type & lhs, const std::pair & rhs) + { + return static_cast(*this)(lhs.first, rhs.first); + } + template + bool operator()(const std::pair & lhs, const value_type & rhs) + { + return static_cast(*this)(lhs.first, rhs.first); + } + template + bool operator()(const std::pair & lhs, const std::pair & rhs) + { + return static_cast(*this)(lhs.first, rhs.first); + } +}; +static constexpr int8_t min_lookups = 4; +template +struct sherwood_v3_entry +{ + sherwood_v3_entry() + { + } + sherwood_v3_entry(int8_t distance_from_desired) + : distance_from_desired(distance_from_desired) + { + } + ~sherwood_v3_entry() + { + } + static sherwood_v3_entry * empty_default_table() + { + static sherwood_v3_entry result[min_lookups] = { {}, {}, {}, {special_end_value} }; + return result; + } + + bool has_value() const + { + return distance_from_desired >= 0; + } + bool is_empty() const + { + return distance_from_desired < 0; + } + bool is_at_desired_position() const + { + return distance_from_desired <= 0; + } + template + void emplace(int8_t distance, Args &&... args) + { + new (std::addressof(value)) T(std::forward(args)...); + distance_from_desired = distance; + } + + void destroy_value() + { + value.~T(); + distance_from_desired = -1; + } + + int8_t distance_from_desired = -1; + static constexpr int8_t special_end_value = 0; + union { T value; }; +}; + +inline int8_t log2(size_t value) +{ + static constexpr int8_t table[64] = + { + 63, 0, 58, 1, 59, 47, 53, 2, + 60, 39, 48, 27, 54, 33, 42, 3, + 61, 51, 37, 40, 49, 18, 28, 20, + 55, 30, 34, 11, 43, 14, 22, 4, + 62, 57, 46, 52, 38, 26, 32, 41, + 50, 36, 17, 19, 29, 10, 13, 21, + 56, 45, 25, 31, 35, 16, 9, 12, + 44, 24, 15, 8, 23, 7, 6, 5 + }; + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; + value |= value >> 32; + return table[((value - (value >> 1)) * 0x07EDD5E59A4E28C2) >> 58]; +} + +template +struct AssignIfTrue +{ + void operator()(T & lhs, const T & rhs) + { + lhs = rhs; + } + void operator()(T & lhs, T && rhs) + { + lhs = std::move(rhs); + } +}; +template +struct AssignIfTrue +{ + void operator()(T &, const T &) + { + } + void operator()(T &, T &&) + { + } +}; + +inline size_t next_power_of_two(size_t i) +{ + --i; + i |= i >> 1; + i |= i >> 2; + i |= i >> 4; + i |= i >> 8; + i |= i >> 16; + i |= i >> 32; + ++i; + return i; +} + +template using void_t = void; + +template +struct HashPolicySelector +{ + typedef fibonacci_hash_policy type; +}; +template +struct HashPolicySelector> +{ + typedef typename T::hash_policy type; +}; + +template +class sherwood_v3_table : private EntryAlloc, private Hasher, private Equal +{ + using Entry = detailv3::sherwood_v3_entry; + using AllocatorTraits = std::allocator_traits; + using EntryPointer = typename AllocatorTraits::pointer; + struct convertible_to_iterator; + +public: + +#ifdef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION + static constexpr float _max_load_factor = 0.5f; +#endif + + using value_type = T; + using size_type = size_t; + using difference_type = std::ptrdiff_t; + using hasher = ArgumentHash; + using key_equal = ArgumentEqual; + using allocator_type = EntryAlloc; + using reference = value_type &; + using const_reference = const value_type &; + using pointer = value_type *; + using const_pointer = const value_type *; + + sherwood_v3_table() + { + } + explicit sherwood_v3_table(size_type bucket_count, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc()) + : EntryAlloc(alloc), Hasher(hash), Equal(equal) + { + rehash(bucket_count); + } + sherwood_v3_table(size_type bucket_count, const ArgumentAlloc & alloc) + : sherwood_v3_table(bucket_count, ArgumentHash(), ArgumentEqual(), alloc) + { + } + sherwood_v3_table(size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc) + : sherwood_v3_table(bucket_count, hash, ArgumentEqual(), alloc) + { + } + explicit sherwood_v3_table(const ArgumentAlloc & alloc) + : EntryAlloc(alloc) + { + } + template + sherwood_v3_table(It first, It last, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc()) + : sherwood_v3_table(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + template + sherwood_v3_table(It first, It last, size_type bucket_count, const ArgumentAlloc & alloc) + : sherwood_v3_table(first, last, bucket_count, ArgumentHash(), ArgumentEqual(), alloc) + { + } + template + sherwood_v3_table(It first, It last, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc) + : sherwood_v3_table(first, last, bucket_count, hash, ArgumentEqual(), alloc) + { + } + sherwood_v3_table(std::initializer_list il, size_type bucket_count = 0, const ArgumentHash & hash = ArgumentHash(), const ArgumentEqual & equal = ArgumentEqual(), const ArgumentAlloc & alloc = ArgumentAlloc()) + : sherwood_v3_table(bucket_count, hash, equal, alloc) + { + if (bucket_count == 0) + rehash(il.size()); + insert(il.begin(), il.end()); + } + sherwood_v3_table(std::initializer_list il, size_type bucket_count, const ArgumentAlloc & alloc) + : sherwood_v3_table(il, bucket_count, ArgumentHash(), ArgumentEqual(), alloc) + { + } + sherwood_v3_table(std::initializer_list il, size_type bucket_count, const ArgumentHash & hash, const ArgumentAlloc & alloc) + : sherwood_v3_table(il, bucket_count, hash, ArgumentEqual(), alloc) + { + } + sherwood_v3_table(const sherwood_v3_table & other) + : sherwood_v3_table(other, AllocatorTraits::select_on_container_copy_construction(other.get_allocator())) + { + } + sherwood_v3_table(const sherwood_v3_table & other, const ArgumentAlloc & alloc) + : EntryAlloc(alloc), Hasher(other), Equal(other) +#ifndef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION + , _max_load_factor(other._max_load_factor) +#endif + { + rehash_for_other_container(other); + try + { + insert(other.begin(), other.end()); + } + catch(...) + { + clear(); + deallocate_data(entries, num_slots_minus_one, max_lookups); + throw; + } + } + sherwood_v3_table(sherwood_v3_table && other) noexcept + : EntryAlloc(std::move(other)), Hasher(std::move(other)), Equal(std::move(other)) + { + swap_pointers(other); + } + sherwood_v3_table(sherwood_v3_table && other, const ArgumentAlloc & alloc) noexcept + : EntryAlloc(alloc), Hasher(std::move(other)), Equal(std::move(other)) + { + swap_pointers(other); + } + sherwood_v3_table & operator=(const sherwood_v3_table & other) + { + if (this == std::addressof(other)) + return *this; + + clear(); + if (AllocatorTraits::propagate_on_container_copy_assignment::value) + { + if (static_cast(*this) != static_cast(other)) + { + reset_to_empty_state(); + } + AssignIfTrue()(*this, other); + } +#ifndef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION + _max_load_factor = other._max_load_factor; +#endif + static_cast(*this) = other; + static_cast(*this) = other; + rehash_for_other_container(other); + insert(other.begin(), other.end()); + return *this; + } + sherwood_v3_table & operator=(sherwood_v3_table && other) noexcept + { + if (this == std::addressof(other)) + return *this; + else if (AllocatorTraits::propagate_on_container_move_assignment::value) + { + clear(); + reset_to_empty_state(); + AssignIfTrue()(*this, std::move(other)); + swap_pointers(other); + } + else if (static_cast(*this) == static_cast(other)) + { + swap_pointers(other); + } + else + { + clear(); +#ifndef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION + _max_load_factor = other._max_load_factor; +#endif + rehash_for_other_container(other); + for (T & elem : other) + emplace(std::move(elem)); + other.clear(); + } + static_cast(*this) = std::move(other); + static_cast(*this) = std::move(other); + return *this; + } + ~sherwood_v3_table() + { + clear(); + deallocate_data(entries, num_slots_minus_one, max_lookups); + } + + const allocator_type & get_allocator() const + { + return static_cast(*this); + } + const ArgumentEqual & key_eq() const + { + return static_cast(*this); + } + const ArgumentHash & hash_function() const + { + return static_cast(*this); + } + + template + struct templated_iterator + { + templated_iterator() = default; + templated_iterator(EntryPointer current) + : current(current) + { + } + EntryPointer current = EntryPointer(); + + using iterator_category = std::forward_iterator_tag; + using value_type = ValueType; + using difference_type = ptrdiff_t; + using pointer = ValueType *; + using reference = ValueType &; + + friend bool operator==(const templated_iterator & lhs, const templated_iterator & rhs) + { + return lhs.current == rhs.current; + } + friend bool operator!=(const templated_iterator & lhs, const templated_iterator & rhs) + { + return !(lhs == rhs); + } + + templated_iterator & operator++() + { + do + { + ++current; + } + while(current->is_empty()); + return *this; + } + templated_iterator operator++(int) + { + templated_iterator copy(*this); + ++*this; + return copy; + } + + ValueType & operator*() const + { + return current->value; + } + ValueType * operator->() const + { + return std::addressof(current->value); + } + + operator templated_iterator() const + { + return { current }; + } + }; + using iterator = templated_iterator; + using const_iterator = templated_iterator; + + iterator begin() + { + for (EntryPointer it = entries;; ++it) + { + if (it->has_value()) + return { it }; + } + } + const_iterator begin() const + { + for (EntryPointer it = entries;; ++it) + { + if (it->has_value()) + return { it }; + } + } + const_iterator cbegin() const + { + return begin(); + } + iterator end() + { + return { entries + static_cast(num_slots_minus_one + max_lookups) }; + } + const_iterator end() const + { + return { entries + static_cast(num_slots_minus_one + max_lookups) }; + } + const_iterator cend() const + { + return end(); + } + + iterator find(const FindKey & key) + { + size_t index = hash_policy.index_for_hash(hash_object(key), num_slots_minus_one); + EntryPointer it = entries + ptrdiff_t(index); + for (int8_t distance = 0; it->distance_from_desired >= distance; ++distance, ++it) + { + if (compares_equal(key, it->value)) + return { it }; + } + return end(); + } + const_iterator find(const FindKey & key) const + { + return const_cast(this)->find(key); + } + size_t count(const FindKey & key) const + { + return find(key) == end() ? 0 : 1; + } + std::pair equal_range(const FindKey & key) + { + iterator found = find(key); + if (found == end()) + return { found, found }; + else + return { found, std::next(found) }; + } + std::pair equal_range(const FindKey & key) const + { + const_iterator found = find(key); + if (found == end()) + return { found, found }; + else + return { found, std::next(found) }; + } + + template + std::pair emplace(Key && key, Args &&... args) + { + size_t index = hash_policy.index_for_hash(hash_object(key), num_slots_minus_one); + EntryPointer current_entry = entries + ptrdiff_t(index); + int8_t distance_from_desired = 0; + for (; current_entry->distance_from_desired >= distance_from_desired; ++current_entry, ++distance_from_desired) + { + if (compares_equal(key, current_entry->value)) + return { { current_entry }, false }; + } + return emplace_new_key(distance_from_desired, current_entry, std::forward(key), std::forward(args)...); + } + + std::pair insert(const value_type & value) + { + return emplace(value); + } + std::pair insert(value_type && value) + { + return emplace(std::move(value)); + } + template + iterator emplace_hint(const_iterator, Args &&... args) + { + return emplace(std::forward(args)...).first; + } + iterator insert(const_iterator, const value_type & value) + { + return emplace(value).first; + } + iterator insert(const_iterator, value_type && value) + { + return emplace(std::move(value)).first; + } + + template + void insert(It begin, It end) + { + for (; begin != end; ++begin) + { + emplace(*begin); + } + } + void insert(std::initializer_list il) + { + insert(il.begin(), il.end()); + } + + void rehash(size_t num_buckets) + { + num_buckets = std::max(num_buckets, static_cast(std::ceil(num_elements / static_cast(_max_load_factor)))); + if (num_buckets == 0) + { + reset_to_empty_state(); + return; + } + auto new_prime_index = hash_policy.next_size_over(num_buckets); + if (num_buckets == bucket_count()) + return; + int8_t new_max_lookups = compute_max_lookups(num_buckets); + EntryPointer new_buckets(AllocatorTraits::allocate(*this, num_buckets + new_max_lookups)); + EntryPointer special_end_item = new_buckets + static_cast(num_buckets + new_max_lookups - 1); + for (EntryPointer it = new_buckets; it != special_end_item; ++it) + it->distance_from_desired = -1; + special_end_item->distance_from_desired = Entry::special_end_value; + std::swap(entries, new_buckets); + std::swap(num_slots_minus_one, num_buckets); + --num_slots_minus_one; + hash_policy.commit(new_prime_index); + int8_t old_max_lookups = max_lookups; + max_lookups = new_max_lookups; + num_elements = 0; + for (EntryPointer it = new_buckets, end = it + static_cast(num_buckets + old_max_lookups); it != end; ++it) + { + if (it->has_value()) + { + emplace(std::move(it->value)); + it->destroy_value(); + } + } + deallocate_data(new_buckets, num_buckets, old_max_lookups); + } + + void reserve(size_t num_elements) + { + size_t required_buckets = num_buckets_for_reserve(num_elements); + if (required_buckets > bucket_count()) + rehash(required_buckets); + } + + // the return value is a type that can be converted to an iterator + // the reason for doing this is that it's not free to find the + // iterator pointing at the next element. if you care about the + // next iterator, turn the return value into an iterator + convertible_to_iterator erase(const_iterator to_erase) + { + EntryPointer current = to_erase.current; + current->destroy_value(); + --num_elements; + for (EntryPointer next = current + ptrdiff_t(1); !next->is_at_desired_position(); ++current, ++next) + { + current->emplace(next->distance_from_desired - 1, std::move(next->value)); + next->destroy_value(); + } + return { to_erase.current }; + } + + iterator erase(const_iterator begin_it, const_iterator end_it) + { + if (begin_it == end_it) + return { begin_it.current }; + for (EntryPointer it = begin_it.current, end = end_it.current; it != end; ++it) + { + if (it->has_value()) + { + it->destroy_value(); + --num_elements; + } + } + if (end_it == this->end()) + return this->end(); + ptrdiff_t num_to_move = std::min(static_cast(end_it.current->distance_from_desired), end_it.current - begin_it.current); + EntryPointer to_return = end_it.current - num_to_move; + for (EntryPointer it = end_it.current; !it->is_at_desired_position();) + { + EntryPointer target = it - num_to_move; + target->emplace(it->distance_from_desired - num_to_move, std::move(it->value)); + it->destroy_value(); + ++it; + num_to_move = std::min(static_cast(it->distance_from_desired), num_to_move); + } + return { to_return }; + } + + size_t erase(const FindKey & key) + { + auto found = find(key); + if (found == end()) + return 0; + else + { + erase(found); + return 1; + } + } + + void clear() + { + for (EntryPointer it = entries, end = it + static_cast(num_slots_minus_one + max_lookups); it != end; ++it) + { + if (it->has_value()) + it->destroy_value(); + } + num_elements = 0; + } + + void shrink_to_fit() + { + rehash_for_other_container(*this); + } + + void swap(sherwood_v3_table & other) + { + using std::swap; + swap_pointers(other); + swap(static_cast(*this), static_cast(other)); + swap(static_cast(*this), static_cast(other)); + if (AllocatorTraits::propagate_on_container_swap::value) + swap(static_cast(*this), static_cast(other)); + } + + size_t size() const + { + return num_elements; + } + size_t max_size() const + { + return (AllocatorTraits::max_size(*this)) / sizeof(Entry); + } + size_t bucket_count() const + { + return num_slots_minus_one ? num_slots_minus_one + 1 : 0; + } + size_type max_bucket_count() const + { + return (AllocatorTraits::max_size(*this) - min_lookups) / sizeof(Entry); + } + size_t bucket(const FindKey & key) const + { + return hash_policy.index_for_hash(hash_object(key), num_slots_minus_one); + } + float load_factor() const + { + size_t buckets = bucket_count(); + if (buckets) + return static_cast(num_elements) / bucket_count(); + else + return 0; + } + void max_load_factor(float value) + { +#ifndef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION + _max_load_factor = value; +#endif + } + float max_load_factor() const + { + return _max_load_factor; + } + + bool empty() const + { + return num_elements == 0; + } + +private: + EntryPointer entries = Entry::empty_default_table(); + size_t num_slots_minus_one = 0; + typename HashPolicySelector::type hash_policy; +#ifndef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION + float _max_load_factor = 0.5f; +#endif + size_t num_elements = 0; + int8_t max_lookups = detailv3::min_lookups - 1; + + static int8_t compute_max_lookups(size_t num_buckets) + { + int8_t desired = detailv3::log2(num_buckets); + return std::max(detailv3::min_lookups, desired); + } + + size_t num_buckets_for_reserve(size_t num_elements) const + { + return static_cast(std::ceil(num_elements / std::min(0.5, static_cast(_max_load_factor)))); + } + void rehash_for_other_container(const sherwood_v3_table & other) + { + rehash(std::min(num_buckets_for_reserve(other.size()), other.bucket_count())); + } + + void swap_pointers(sherwood_v3_table & other) + { + using std::swap; + swap(hash_policy, other.hash_policy); + swap(entries, other.entries); + swap(num_slots_minus_one, other.num_slots_minus_one); + swap(num_elements, other.num_elements); + swap(max_lookups, other.max_lookups); +#ifndef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION + swap(_max_load_factor, other._max_load_factor); +#endif + } + + template + SKA_NOINLINE(std::pair) emplace_new_key(int8_t distance_from_desired, EntryPointer current_entry, Key && key, Args &&... args) + { + using std::swap; + if (num_slots_minus_one == 0 || distance_from_desired == max_lookups || num_elements + 1 > (num_slots_minus_one + 1) * static_cast(_max_load_factor)) + { + grow(); + return emplace(std::forward(key), std::forward(args)...); + } + else if (current_entry->is_empty()) + { + current_entry->emplace(distance_from_desired, std::forward(key), std::forward(args)...); + ++num_elements; + return { { current_entry }, true }; + } + value_type to_insert(std::forward(key), std::forward(args)...); + swap(distance_from_desired, current_entry->distance_from_desired); + swap(to_insert, current_entry->value); + iterator result = { current_entry }; + for (++distance_from_desired, ++current_entry;; ++current_entry) + { + if (current_entry->is_empty()) + { + current_entry->emplace(distance_from_desired, std::move(to_insert)); + ++num_elements; + return { result, true }; + } + else if (current_entry->distance_from_desired < distance_from_desired) + { + swap(distance_from_desired, current_entry->distance_from_desired); + swap(to_insert, current_entry->value); + ++distance_from_desired; + } + else + { + ++distance_from_desired; + if (distance_from_desired == max_lookups) + { + swap(to_insert, result.current->value); + grow(); + return emplace(std::move(to_insert)); + } + } + } + } + + void grow() + { + rehash(std::max(size_t(4), 2 * bucket_count())); + } + + void deallocate_data(EntryPointer begin, size_t num_slots_minus_one, int8_t max_lookups) + { + if (begin != Entry::empty_default_table()) + { + AllocatorTraits::deallocate(*this, begin, num_slots_minus_one + max_lookups + 1); + } + } + + void reset_to_empty_state() + { + deallocate_data(entries, num_slots_minus_one, max_lookups); + entries = Entry::empty_default_table(); + num_slots_minus_one = 0; + hash_policy.reset(); + max_lookups = detailv3::min_lookups - 1; + } + + template + size_t hash_object(const U & key) + { + return static_cast(*this)(key); + } + template + size_t hash_object(const U & key) const + { + return static_cast(*this)(key); + } + template + bool compares_equal(const L & lhs, const R & rhs) + { + return static_cast(*this)(lhs, rhs); + } + + struct convertible_to_iterator + { + EntryPointer it; + + operator iterator() + { + if (it->has_value()) + return { it }; + else + return ++iterator{it}; + } + operator const_iterator() + { + if (it->has_value()) + return { it }; + else + return ++const_iterator{it}; + } + }; + +}; +} + +struct prime_number_hash_policy +{ + static size_t mod0(size_t) { return 0llu; } + static size_t mod2(size_t hash) { return hash % 2llu; } + static size_t mod3(size_t hash) { return hash % 3llu; } + static size_t mod5(size_t hash) { return hash % 5llu; } + static size_t mod7(size_t hash) { return hash % 7llu; } + static size_t mod11(size_t hash) { return hash % 11llu; } + static size_t mod13(size_t hash) { return hash % 13llu; } + static size_t mod17(size_t hash) { return hash % 17llu; } + static size_t mod23(size_t hash) { return hash % 23llu; } + static size_t mod29(size_t hash) { return hash % 29llu; } + static size_t mod37(size_t hash) { return hash % 37llu; } + static size_t mod47(size_t hash) { return hash % 47llu; } + static size_t mod59(size_t hash) { return hash % 59llu; } + static size_t mod73(size_t hash) { return hash % 73llu; } + static size_t mod97(size_t hash) { return hash % 97llu; } + static size_t mod127(size_t hash) { return hash % 127llu; } + static size_t mod151(size_t hash) { return hash % 151llu; } + static size_t mod197(size_t hash) { return hash % 197llu; } + static size_t mod251(size_t hash) { return hash % 251llu; } + static size_t mod313(size_t hash) { return hash % 313llu; } + static size_t mod397(size_t hash) { return hash % 397llu; } + static size_t mod499(size_t hash) { return hash % 499llu; } + static size_t mod631(size_t hash) { return hash % 631llu; } + static size_t mod797(size_t hash) { return hash % 797llu; } + static size_t mod1009(size_t hash) { return hash % 1009llu; } + static size_t mod1259(size_t hash) { return hash % 1259llu; } + static size_t mod1597(size_t hash) { return hash % 1597llu; } + static size_t mod2011(size_t hash) { return hash % 2011llu; } + static size_t mod2539(size_t hash) { return hash % 2539llu; } + static size_t mod3203(size_t hash) { return hash % 3203llu; } + static size_t mod4027(size_t hash) { return hash % 4027llu; } + static size_t mod5087(size_t hash) { return hash % 5087llu; } + static size_t mod6421(size_t hash) { return hash % 6421llu; } + static size_t mod8089(size_t hash) { return hash % 8089llu; } + static size_t mod10193(size_t hash) { return hash % 10193llu; } + static size_t mod12853(size_t hash) { return hash % 12853llu; } + static size_t mod16193(size_t hash) { return hash % 16193llu; } + static size_t mod20399(size_t hash) { return hash % 20399llu; } + static size_t mod25717(size_t hash) { return hash % 25717llu; } + static size_t mod32401(size_t hash) { return hash % 32401llu; } + static size_t mod40823(size_t hash) { return hash % 40823llu; } + static size_t mod51437(size_t hash) { return hash % 51437llu; } + static size_t mod64811(size_t hash) { return hash % 64811llu; } + static size_t mod81649(size_t hash) { return hash % 81649llu; } + static size_t mod102877(size_t hash) { return hash % 102877llu; } + static size_t mod129607(size_t hash) { return hash % 129607llu; } + static size_t mod163307(size_t hash) { return hash % 163307llu; } + static size_t mod205759(size_t hash) { return hash % 205759llu; } + static size_t mod259229(size_t hash) { return hash % 259229llu; } + static size_t mod326617(size_t hash) { return hash % 326617llu; } + static size_t mod411527(size_t hash) { return hash % 411527llu; } + static size_t mod518509(size_t hash) { return hash % 518509llu; } + static size_t mod653267(size_t hash) { return hash % 653267llu; } + static size_t mod823117(size_t hash) { return hash % 823117llu; } + static size_t mod1037059(size_t hash) { return hash % 1037059llu; } + static size_t mod1306601(size_t hash) { return hash % 1306601llu; } + static size_t mod1646237(size_t hash) { return hash % 1646237llu; } + static size_t mod2074129(size_t hash) { return hash % 2074129llu; } + static size_t mod2613229(size_t hash) { return hash % 2613229llu; } + static size_t mod3292489(size_t hash) { return hash % 3292489llu; } + static size_t mod4148279(size_t hash) { return hash % 4148279llu; } + static size_t mod5226491(size_t hash) { return hash % 5226491llu; } + static size_t mod6584983(size_t hash) { return hash % 6584983llu; } + static size_t mod8296553(size_t hash) { return hash % 8296553llu; } + static size_t mod10453007(size_t hash) { return hash % 10453007llu; } + static size_t mod13169977(size_t hash) { return hash % 13169977llu; } + static size_t mod16593127(size_t hash) { return hash % 16593127llu; } + static size_t mod20906033(size_t hash) { return hash % 20906033llu; } + static size_t mod26339969(size_t hash) { return hash % 26339969llu; } + static size_t mod33186281(size_t hash) { return hash % 33186281llu; } + static size_t mod41812097(size_t hash) { return hash % 41812097llu; } + static size_t mod52679969(size_t hash) { return hash % 52679969llu; } + static size_t mod66372617(size_t hash) { return hash % 66372617llu; } + static size_t mod83624237(size_t hash) { return hash % 83624237llu; } + static size_t mod105359939(size_t hash) { return hash % 105359939llu; } + static size_t mod132745199(size_t hash) { return hash % 132745199llu; } + static size_t mod167248483(size_t hash) { return hash % 167248483llu; } + static size_t mod210719881(size_t hash) { return hash % 210719881llu; } + static size_t mod265490441(size_t hash) { return hash % 265490441llu; } + static size_t mod334496971(size_t hash) { return hash % 334496971llu; } + static size_t mod421439783(size_t hash) { return hash % 421439783llu; } + static size_t mod530980861(size_t hash) { return hash % 530980861llu; } + static size_t mod668993977(size_t hash) { return hash % 668993977llu; } + static size_t mod842879579(size_t hash) { return hash % 842879579llu; } + static size_t mod1061961721(size_t hash) { return hash % 1061961721llu; } + static size_t mod1337987929(size_t hash) { return hash % 1337987929llu; } + static size_t mod1685759167(size_t hash) { return hash % 1685759167llu; } + static size_t mod2123923447(size_t hash) { return hash % 2123923447llu; } + static size_t mod2675975881(size_t hash) { return hash % 2675975881llu; } + static size_t mod3371518343(size_t hash) { return hash % 3371518343llu; } + static size_t mod4247846927(size_t hash) { return hash % 4247846927llu; } + static size_t mod5351951779(size_t hash) { return hash % 5351951779llu; } + static size_t mod6743036717(size_t hash) { return hash % 6743036717llu; } + static size_t mod8495693897(size_t hash) { return hash % 8495693897llu; } + static size_t mod10703903591(size_t hash) { return hash % 10703903591llu; } + static size_t mod13486073473(size_t hash) { return hash % 13486073473llu; } + static size_t mod16991387857(size_t hash) { return hash % 16991387857llu; } + static size_t mod21407807219(size_t hash) { return hash % 21407807219llu; } + static size_t mod26972146961(size_t hash) { return hash % 26972146961llu; } + static size_t mod33982775741(size_t hash) { return hash % 33982775741llu; } + static size_t mod42815614441(size_t hash) { return hash % 42815614441llu; } + static size_t mod53944293929(size_t hash) { return hash % 53944293929llu; } + static size_t mod67965551447(size_t hash) { return hash % 67965551447llu; } + static size_t mod85631228929(size_t hash) { return hash % 85631228929llu; } + static size_t mod107888587883(size_t hash) { return hash % 107888587883llu; } + static size_t mod135931102921(size_t hash) { return hash % 135931102921llu; } + static size_t mod171262457903(size_t hash) { return hash % 171262457903llu; } + static size_t mod215777175787(size_t hash) { return hash % 215777175787llu; } + static size_t mod271862205833(size_t hash) { return hash % 271862205833llu; } + static size_t mod342524915839(size_t hash) { return hash % 342524915839llu; } + static size_t mod431554351609(size_t hash) { return hash % 431554351609llu; } + static size_t mod543724411781(size_t hash) { return hash % 543724411781llu; } + static size_t mod685049831731(size_t hash) { return hash % 685049831731llu; } + static size_t mod863108703229(size_t hash) { return hash % 863108703229llu; } + static size_t mod1087448823553(size_t hash) { return hash % 1087448823553llu; } + static size_t mod1370099663459(size_t hash) { return hash % 1370099663459llu; } + static size_t mod1726217406467(size_t hash) { return hash % 1726217406467llu; } + static size_t mod2174897647073(size_t hash) { return hash % 2174897647073llu; } + static size_t mod2740199326961(size_t hash) { return hash % 2740199326961llu; } + static size_t mod3452434812973(size_t hash) { return hash % 3452434812973llu; } + static size_t mod4349795294267(size_t hash) { return hash % 4349795294267llu; } + static size_t mod5480398654009(size_t hash) { return hash % 5480398654009llu; } + static size_t mod6904869625999(size_t hash) { return hash % 6904869625999llu; } + static size_t mod8699590588571(size_t hash) { return hash % 8699590588571llu; } + static size_t mod10960797308051(size_t hash) { return hash % 10960797308051llu; } + static size_t mod13809739252051(size_t hash) { return hash % 13809739252051llu; } + static size_t mod17399181177241(size_t hash) { return hash % 17399181177241llu; } + static size_t mod21921594616111(size_t hash) { return hash % 21921594616111llu; } + static size_t mod27619478504183(size_t hash) { return hash % 27619478504183llu; } + static size_t mod34798362354533(size_t hash) { return hash % 34798362354533llu; } + static size_t mod43843189232363(size_t hash) { return hash % 43843189232363llu; } + static size_t mod55238957008387(size_t hash) { return hash % 55238957008387llu; } + static size_t mod69596724709081(size_t hash) { return hash % 69596724709081llu; } + static size_t mod87686378464759(size_t hash) { return hash % 87686378464759llu; } + static size_t mod110477914016779(size_t hash) { return hash % 110477914016779llu; } + static size_t mod139193449418173(size_t hash) { return hash % 139193449418173llu; } + static size_t mod175372756929481(size_t hash) { return hash % 175372756929481llu; } + static size_t mod220955828033581(size_t hash) { return hash % 220955828033581llu; } + static size_t mod278386898836457(size_t hash) { return hash % 278386898836457llu; } + static size_t mod350745513859007(size_t hash) { return hash % 350745513859007llu; } + static size_t mod441911656067171(size_t hash) { return hash % 441911656067171llu; } + static size_t mod556773797672909(size_t hash) { return hash % 556773797672909llu; } + static size_t mod701491027718027(size_t hash) { return hash % 701491027718027llu; } + static size_t mod883823312134381(size_t hash) { return hash % 883823312134381llu; } + static size_t mod1113547595345903(size_t hash) { return hash % 1113547595345903llu; } + static size_t mod1402982055436147(size_t hash) { return hash % 1402982055436147llu; } + static size_t mod1767646624268779(size_t hash) { return hash % 1767646624268779llu; } + static size_t mod2227095190691797(size_t hash) { return hash % 2227095190691797llu; } + static size_t mod2805964110872297(size_t hash) { return hash % 2805964110872297llu; } + static size_t mod3535293248537579(size_t hash) { return hash % 3535293248537579llu; } + static size_t mod4454190381383713(size_t hash) { return hash % 4454190381383713llu; } + static size_t mod5611928221744609(size_t hash) { return hash % 5611928221744609llu; } + static size_t mod7070586497075177(size_t hash) { return hash % 7070586497075177llu; } + static size_t mod8908380762767489(size_t hash) { return hash % 8908380762767489llu; } + static size_t mod11223856443489329(size_t hash) { return hash % 11223856443489329llu; } + static size_t mod14141172994150357(size_t hash) { return hash % 14141172994150357llu; } + static size_t mod17816761525534927(size_t hash) { return hash % 17816761525534927llu; } + static size_t mod22447712886978529(size_t hash) { return hash % 22447712886978529llu; } + static size_t mod28282345988300791(size_t hash) { return hash % 28282345988300791llu; } + static size_t mod35633523051069991(size_t hash) { return hash % 35633523051069991llu; } + static size_t mod44895425773957261(size_t hash) { return hash % 44895425773957261llu; } + static size_t mod56564691976601587(size_t hash) { return hash % 56564691976601587llu; } + static size_t mod71267046102139967(size_t hash) { return hash % 71267046102139967llu; } + static size_t mod89790851547914507(size_t hash) { return hash % 89790851547914507llu; } + static size_t mod113129383953203213(size_t hash) { return hash % 113129383953203213llu; } + static size_t mod142534092204280003(size_t hash) { return hash % 142534092204280003llu; } + static size_t mod179581703095829107(size_t hash) { return hash % 179581703095829107llu; } + static size_t mod226258767906406483(size_t hash) { return hash % 226258767906406483llu; } + static size_t mod285068184408560057(size_t hash) { return hash % 285068184408560057llu; } + static size_t mod359163406191658253(size_t hash) { return hash % 359163406191658253llu; } + static size_t mod452517535812813007(size_t hash) { return hash % 452517535812813007llu; } + static size_t mod570136368817120201(size_t hash) { return hash % 570136368817120201llu; } + static size_t mod718326812383316683(size_t hash) { return hash % 718326812383316683llu; } + static size_t mod905035071625626043(size_t hash) { return hash % 905035071625626043llu; } + static size_t mod1140272737634240411(size_t hash) { return hash % 1140272737634240411llu; } + static size_t mod1436653624766633509(size_t hash) { return hash % 1436653624766633509llu; } + static size_t mod1810070143251252131(size_t hash) { return hash % 1810070143251252131llu; } + static size_t mod2280545475268481167(size_t hash) { return hash % 2280545475268481167llu; } + static size_t mod2873307249533267101(size_t hash) { return hash % 2873307249533267101llu; } + static size_t mod3620140286502504283(size_t hash) { return hash % 3620140286502504283llu; } + static size_t mod4561090950536962147(size_t hash) { return hash % 4561090950536962147llu; } + static size_t mod5746614499066534157(size_t hash) { return hash % 5746614499066534157llu; } + static size_t mod7240280573005008577(size_t hash) { return hash % 7240280573005008577llu; } + static size_t mod9122181901073924329(size_t hash) { return hash % 9122181901073924329llu; } + static size_t mod11493228998133068689(size_t hash) { return hash % 11493228998133068689llu; } + static size_t mod14480561146010017169(size_t hash) { return hash % 14480561146010017169llu; } + static size_t mod18446744073709551557(size_t hash) { return hash % 18446744073709551557llu; } + + using mod_function = size_t (*)(size_t); + + mod_function next_size_over(size_t & size) const + { + // prime numbers generated by the following method: + // 1. start with a prime p = 2 + // 2. go to wolfram alpha and get p = NextPrime(2 * p) + // 3. repeat 2. until you overflow 64 bits + // you now have large gaps which you would hit if somebody called reserve() with an unlucky number. + // 4. to fill the gaps for every prime p go to wolfram alpha and get ClosestPrime(p * 2^(1/3)) and ClosestPrime(p * 2^(2/3)) and put those in the gaps + // 5. get PrevPrime(2^64) and put it at the end + static constexpr const size_t prime_list[] = + { + 2llu, 3llu, 5llu, 7llu, 11llu, 13llu, 17llu, 23llu, 29llu, 37llu, 47llu, + 59llu, 73llu, 97llu, 127llu, 151llu, 197llu, 251llu, 313llu, 397llu, + 499llu, 631llu, 797llu, 1009llu, 1259llu, 1597llu, 2011llu, 2539llu, + 3203llu, 4027llu, 5087llu, 6421llu, 8089llu, 10193llu, 12853llu, 16193llu, + 20399llu, 25717llu, 32401llu, 40823llu, 51437llu, 64811llu, 81649llu, + 102877llu, 129607llu, 163307llu, 205759llu, 259229llu, 326617llu, + 411527llu, 518509llu, 653267llu, 823117llu, 1037059llu, 1306601llu, + 1646237llu, 2074129llu, 2613229llu, 3292489llu, 4148279llu, 5226491llu, + 6584983llu, 8296553llu, 10453007llu, 13169977llu, 16593127llu, 20906033llu, + 26339969llu, 33186281llu, 41812097llu, 52679969llu, 66372617llu, + 83624237llu, 105359939llu, 132745199llu, 167248483llu, 210719881llu, + 265490441llu, 334496971llu, 421439783llu, 530980861llu, 668993977llu, + 842879579llu, 1061961721llu, 1337987929llu, 1685759167llu, 2123923447llu, + 2675975881llu, 3371518343llu, 4247846927llu, 5351951779llu, 6743036717llu, + 8495693897llu, 10703903591llu, 13486073473llu, 16991387857llu, + 21407807219llu, 26972146961llu, 33982775741llu, 42815614441llu, + 53944293929llu, 67965551447llu, 85631228929llu, 107888587883llu, + 135931102921llu, 171262457903llu, 215777175787llu, 271862205833llu, + 342524915839llu, 431554351609llu, 543724411781llu, 685049831731llu, + 863108703229llu, 1087448823553llu, 1370099663459llu, 1726217406467llu, + 2174897647073llu, 2740199326961llu, 3452434812973llu, 4349795294267llu, + 5480398654009llu, 6904869625999llu, 8699590588571llu, 10960797308051llu, + 13809739252051llu, 17399181177241llu, 21921594616111llu, 27619478504183llu, + 34798362354533llu, 43843189232363llu, 55238957008387llu, 69596724709081llu, + 87686378464759llu, 110477914016779llu, 139193449418173llu, + 175372756929481llu, 220955828033581llu, 278386898836457llu, + 350745513859007llu, 441911656067171llu, 556773797672909llu, + 701491027718027llu, 883823312134381llu, 1113547595345903llu, + 1402982055436147llu, 1767646624268779llu, 2227095190691797llu, + 2805964110872297llu, 3535293248537579llu, 4454190381383713llu, + 5611928221744609llu, 7070586497075177llu, 8908380762767489llu, + 11223856443489329llu, 14141172994150357llu, 17816761525534927llu, + 22447712886978529llu, 28282345988300791llu, 35633523051069991llu, + 44895425773957261llu, 56564691976601587llu, 71267046102139967llu, + 89790851547914507llu, 113129383953203213llu, 142534092204280003llu, + 179581703095829107llu, 226258767906406483llu, 285068184408560057llu, + 359163406191658253llu, 452517535812813007llu, 570136368817120201llu, + 718326812383316683llu, 905035071625626043llu, 1140272737634240411llu, + 1436653624766633509llu, 1810070143251252131llu, 2280545475268481167llu, + 2873307249533267101llu, 3620140286502504283llu, 4561090950536962147llu, + 5746614499066534157llu, 7240280573005008577llu, 9122181901073924329llu, + 11493228998133068689llu, 14480561146010017169llu, 18446744073709551557llu + }; + static constexpr size_t (* const mod_functions[])(size_t) = + { + &mod0, &mod2, &mod3, &mod5, &mod7, &mod11, &mod13, &mod17, &mod23, &mod29, &mod37, + &mod47, &mod59, &mod73, &mod97, &mod127, &mod151, &mod197, &mod251, &mod313, &mod397, + &mod499, &mod631, &mod797, &mod1009, &mod1259, &mod1597, &mod2011, &mod2539, &mod3203, + &mod4027, &mod5087, &mod6421, &mod8089, &mod10193, &mod12853, &mod16193, &mod20399, + &mod25717, &mod32401, &mod40823, &mod51437, &mod64811, &mod81649, &mod102877, + &mod129607, &mod163307, &mod205759, &mod259229, &mod326617, &mod411527, &mod518509, + &mod653267, &mod823117, &mod1037059, &mod1306601, &mod1646237, &mod2074129, + &mod2613229, &mod3292489, &mod4148279, &mod5226491, &mod6584983, &mod8296553, + &mod10453007, &mod13169977, &mod16593127, &mod20906033, &mod26339969, &mod33186281, + &mod41812097, &mod52679969, &mod66372617, &mod83624237, &mod105359939, &mod132745199, + &mod167248483, &mod210719881, &mod265490441, &mod334496971, &mod421439783, + &mod530980861, &mod668993977, &mod842879579, &mod1061961721, &mod1337987929, + &mod1685759167, &mod2123923447, &mod2675975881, &mod3371518343, &mod4247846927, + &mod5351951779, &mod6743036717, &mod8495693897, &mod10703903591, &mod13486073473, + &mod16991387857, &mod21407807219, &mod26972146961, &mod33982775741, &mod42815614441, + &mod53944293929, &mod67965551447, &mod85631228929, &mod107888587883, &mod135931102921, + &mod171262457903, &mod215777175787, &mod271862205833, &mod342524915839, + &mod431554351609, &mod543724411781, &mod685049831731, &mod863108703229, + &mod1087448823553, &mod1370099663459, &mod1726217406467, &mod2174897647073, + &mod2740199326961, &mod3452434812973, &mod4349795294267, &mod5480398654009, + &mod6904869625999, &mod8699590588571, &mod10960797308051, &mod13809739252051, + &mod17399181177241, &mod21921594616111, &mod27619478504183, &mod34798362354533, + &mod43843189232363, &mod55238957008387, &mod69596724709081, &mod87686378464759, + &mod110477914016779, &mod139193449418173, &mod175372756929481, &mod220955828033581, + &mod278386898836457, &mod350745513859007, &mod441911656067171, &mod556773797672909, + &mod701491027718027, &mod883823312134381, &mod1113547595345903, &mod1402982055436147, + &mod1767646624268779, &mod2227095190691797, &mod2805964110872297, &mod3535293248537579, + &mod4454190381383713, &mod5611928221744609, &mod7070586497075177, &mod8908380762767489, + &mod11223856443489329, &mod14141172994150357, &mod17816761525534927, + &mod22447712886978529, &mod28282345988300791, &mod35633523051069991, + &mod44895425773957261, &mod56564691976601587, &mod71267046102139967, + &mod89790851547914507, &mod113129383953203213, &mod142534092204280003, + &mod179581703095829107, &mod226258767906406483, &mod285068184408560057, + &mod359163406191658253, &mod452517535812813007, &mod570136368817120201, + &mod718326812383316683, &mod905035071625626043, &mod1140272737634240411, + &mod1436653624766633509, &mod1810070143251252131, &mod2280545475268481167, + &mod2873307249533267101, &mod3620140286502504283, &mod4561090950536962147, + &mod5746614499066534157, &mod7240280573005008577, &mod9122181901073924329, + &mod11493228998133068689, &mod14480561146010017169, &mod18446744073709551557 + }; + const size_t * found = std::lower_bound(std::begin(prime_list), std::end(prime_list) - 1, size); + size = *found; + return mod_functions[1 + found - prime_list]; + } + void commit(mod_function new_mod_function) + { + current_mod_function = new_mod_function; + } + void reset() + { + current_mod_function = &mod0; + } + + size_t index_for_hash(size_t hash, size_t /*num_slots_minus_one*/) const + { + return current_mod_function(hash); + } + size_t keep_in_range(size_t index, size_t num_slots_minus_one) const + { + return index > num_slots_minus_one ? current_mod_function(index) : index; + } + +private: + mod_function current_mod_function = &mod0; +}; + +struct power_of_two_hash_policy +{ + size_t index_for_hash(size_t hash, size_t num_slots_minus_one) const + { + return hash & num_slots_minus_one; + } + size_t keep_in_range(size_t index, size_t num_slots_minus_one) const + { + return index_for_hash(index, num_slots_minus_one); + } + int8_t next_size_over(size_t & size) const + { + size = detailv3::next_power_of_two(size); + return 0; + } + void commit(int8_t) + { + } + void reset() + { + } + +}; + +struct fibonacci_hash_policy +{ + size_t index_for_hash(size_t hash, size_t /*num_slots_minus_one*/) const + { + return (11400714819323198485ull * hash) >> shift; + } + size_t keep_in_range(size_t index, size_t num_slots_minus_one) const + { + return index & num_slots_minus_one; + } + + int8_t next_size_over(size_t & size) const + { + size = std::max(size_t(2), detailv3::next_power_of_two(size)); + return 64 - detailv3::log2(size); + } + void commit(int8_t shift) + { + this->shift = shift; + } + void reset() + { + shift = 63; + } + +private: + int8_t shift = 63; +}; + +template, typename E = std::equal_to, typename A = std::allocator > > +class flat_hash_map + : public detailv3::sherwood_v3_table + < + std::pair, + K, + H, + detailv3::KeyOrValueHasher, H>, + E, + detailv3::KeyOrValueEquality, E>, + A, + typename std::allocator_traits::template rebind_alloc>> + > +{ + using Table = detailv3::sherwood_v3_table + < + std::pair, + K, + H, + detailv3::KeyOrValueHasher, H>, + E, + detailv3::KeyOrValueEquality, E>, + A, + typename std::allocator_traits::template rebind_alloc>> + >; +public: + + using key_type = K; + using mapped_type = V; + + using Table::Table; + flat_hash_map() + { + } + + inline V & operator[](const K & key) + { + return emplace(key, convertible_to_value()).first->second; + } + inline V & operator[](K && key) + { + return emplace(std::move(key), convertible_to_value()).first->second; + } + V & at(const K & key) + { + auto found = this->find(key); + if (found == this->end()) + throw std::out_of_range("Argument passed to at() was not in the map."); + return found->second; + } + const V & at(const K & key) const + { + auto found = this->find(key); + if (found == this->end()) + throw std::out_of_range("Argument passed to at() was not in the map."); + return found->second; + } + + using Table::emplace; + std::pair emplace() + { + return emplace(key_type(), convertible_to_value()); + } + template + std::pair insert_or_assign(const key_type & key, M && m) + { + auto emplace_result = emplace(key, std::forward(m)); + if (!emplace_result.second) + emplace_result.first->second = std::forward(m); + return emplace_result; + } + template + std::pair insert_or_assign(key_type && key, M && m) + { + auto emplace_result = emplace(std::move(key), std::forward(m)); + if (!emplace_result.second) + emplace_result.first->second = std::forward(m); + return emplace_result; + } + template + typename Table::iterator insert_or_assign(typename Table::const_iterator, const key_type & key, M && m) + { + return insert_or_assign(key, std::forward(m)).first; + } + template + typename Table::iterator insert_or_assign(typename Table::const_iterator, key_type && key, M && m) + { + return insert_or_assign(std::move(key), std::forward(m)).first; + } + + friend bool operator==(const flat_hash_map & lhs, const flat_hash_map & rhs) + { + if (lhs.size() != rhs.size()) + return false; + for (const typename Table::value_type & value : lhs) + { + auto found = rhs.find(value.first); + if (found == rhs.end()) + return false; + else if (value.second != found->second) + return false; + } + return true; + } + friend bool operator!=(const flat_hash_map & lhs, const flat_hash_map & rhs) + { + return !(lhs == rhs); + } + +private: + struct convertible_to_value + { + operator V() const + { + return V(); + } + }; +}; + +template, typename E = std::equal_to, typename A = std::allocator > +class flat_hash_set + : public detailv3::sherwood_v3_table + < + T, + T, + H, + detailv3::functor_storage, + E, + detailv3::functor_storage, + A, + typename std::allocator_traits::template rebind_alloc> + > +{ + using Table = detailv3::sherwood_v3_table + < + T, + T, + H, + detailv3::functor_storage, + E, + detailv3::functor_storage, + A, + typename std::allocator_traits::template rebind_alloc> + >; +public: + + using key_type = T; + + using Table::Table; + flat_hash_set() + { + } + + template + std::pair emplace(Args &&... args) + { + return Table::emplace(T(std::forward(args)...)); + } + std::pair emplace(const key_type & arg) + { + return Table::emplace(arg); + } + std::pair emplace(key_type & arg) + { + return Table::emplace(arg); + } + std::pair emplace(const key_type && arg) + { + return Table::emplace(std::move(arg)); + } + std::pair emplace(key_type && arg) + { + return Table::emplace(std::move(arg)); + } + + friend bool operator==(const flat_hash_set & lhs, const flat_hash_set & rhs) + { + if (lhs.size() != rhs.size()) + return false; + for (const T & value : lhs) + { + if (rhs.find(value) == rhs.end()) + return false; + } + return true; + } + friend bool operator!=(const flat_hash_set & lhs, const flat_hash_set & rhs) + { + return !(lhs == rhs); + } +}; + + +template +struct power_of_two_std_hash : std::hash +{ + typedef ska::power_of_two_hash_policy hash_policy; +}; + +} // end namespace ska + +#ifdef FLAT_HASH_MAP_AMALGAM_MEM_REDUCTION +#pragma pack(pop) +#endif diff --git a/src/3rd_party/swiftdtoa/SwiftDtoa.cpp b/src/3rd_party/swiftdtoa/SwiftDtoa.cpp new file mode 100644 index 00000000..7f2c1ffe --- /dev/null +++ b/src/3rd_party/swiftdtoa/SwiftDtoa.cpp @@ -0,0 +1,2763 @@ +//===--- SwiftDtoa.c ---------------------------------------------*- c -*-===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2018-2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===---------------------------------------------------------------------===// +// +// Note: This source file is used in different projects where it gets +// compiled variously as ".c" or ".cpp". Please keep the code clean +// portable C so others can share your improvements. +// +/// For binary16, this uses a simple approach that is normally +/// implemented with variable-length arithmetic. However, due to +/// the limited range of binary16, this can be implemented simply +/// with only 32-bit integer arithmetic. +/// +/// For other formats, SwiftDtoa uses a modified form of the Grisu2 +/// algorithm from Florian Loitsch; "Printing Floating-Point Numbers +/// Quickly and Accurately with Integers", 2010. +/// https://doi.org/10.1145/1806596.1806623 +/// +/// Some of the Grisu2 modifications were suggested by the "Errol +/// paper": Marc Andrysco, Ranjit Jhala, Sorin Lerner; "Printing +/// Floating-Point Numbers: A Faster, Always Correct Method", 2016. +/// https://doi.org/10.1145/2837614.2837654 +/// In particular, the Errol paper explored the impact of higher-precision +/// fixed-width arithmetic on Grisu2 and showed a way to rapidly test +/// the correctness of such algorithms. +/// +/// A few further improvements were inspired by the Ryu algorithm +/// from Ulf Anders; "Ryū: fast float-to-string conversion", 2018. +/// https://doi.org/10.1145/3296979.3192369 +/// +/// In summary, this implementation is: +/// +/// * Fast. It uses only fixed-width integer arithmetic and has +/// constant memory requirements. For double-precision values on +/// 64-bit processors, it is competitive with Ryu. For double-precision +/// values on 32-bit processors, and higher-precision values on all +/// processors, it is considerably faster. +/// +/// * Always Accurate. Converting the decimal form back to binary +/// will always yield exactly the same value. For the IEEE 754 +/// formats, the round-trip will produce exactly the same bit +/// pattern in memory. +/// +/// * Always Short. This always selects an accurate result with the +/// minimum number of significant digits. +/// +/// * Always Close. Among all accurate, short results, this always +/// chooses the result that is closest to the exact floating-point +/// value. (In case of an exact tie, it rounds the last digit even.) +/// +/// * Portable. The code is written in portable C99. The core +/// implementations utilize only fixed-size integer arithmetic. +/// 128-bit integer support is utilized if present but is not +/// necessary. There are thin wrappers that accept platform-native +/// floating point types and delegate to the portable core +/// functions. +/// +// ---------------------------------------------------------------------------- + +#pragma warning(disable: 4244) +#pragma warning(disable: 4319) + +#include +#include +#include +#include +#include +#include +#include + +#include "SwiftDtoa.h" + +#if defined(__SIZEOF_INT128__) + // We get a significant speed boost if we can use the __uint128_t + // type that's present in GCC and Clang on 64-bit architectures. In + // particular, we can do 128-bit arithmetic directly and can + // represent 256-bit integers as collections of 64-bit elements. + #define HAVE_UINT128_T 1 +#else + // On 32-bit, we use slower code that manipulates 128-bit + // and 256-bit integers as collections of 32-bit elements. + #define HAVE_UINT128_T 0 +#endif + +// +// Predefine various arithmetic helpers. Most implementations and extensive +// comments are at the bottom of this file. +// + +#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +// The power-of-10 tables do not directly store the associated binary +// exponent. That's because the binary exponent is a simple linear +// function of the decimal power (and vice versa), so it's just as +// fast (and uses much less memory) to compute it: + +// The binary exponent corresponding to a particular power of 10. +// This matches the power-of-10 tables across the full range of binary128. +#define binaryExponentFor10ToThe(p) ((int)(((((int64_t)(p)) * 55732705) >> 24) + 1)) + +// A decimal exponent that approximates a particular binary power. +#define decimalExponentFor2ToThe(e) ((int)(((int64_t)e * 20201781) >> 26)) +#endif + +// +// Helper functions used only by the single-precision binary32 formatter +// + +#if SWIFT_DTOA_BINARY32_SUPPORT +static uint64_t multiply64x32RoundingDown(uint64_t lhs, uint32_t rhs) { + static const uint64_t mask32 = UINT32_MAX; + uint64_t t = ((lhs & mask32) * rhs) >> 32; + return t + (lhs >> 32) * rhs; +} +static uint64_t multiply64x32RoundingUp(uint64_t lhs, uint32_t rhs) { + static const uint64_t mask32 = UINT32_MAX; + uint64_t t = (((lhs & mask32) * rhs) + mask32) >> 32; + return t + (lhs >> 32) * rhs; +} +static void intervalContainingPowerOf10_Binary32(int p, uint64_t *lower, uint64_t *upper, int *exponent); +#endif + +// +// Helpers used by binary32, binary64, float80, and binary128 +// + +#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +#if HAVE_UINT128_T +typedef __uint128_t swift_uint128_t; +#define initialize128WithHighLow64(dest, high64, low64) ((dest) = ((__uint128_t)(high64) << 64) | (low64)) +#define shiftLeft128(u128, shift) (*(u128) <<= shift) +#else +typedef struct { + uint32_t low, b, c, high; +} swift_uint128_t; +#define initialize128WithHighLow64(dest, high64, low64) \ + ((dest).low = (uint32_t)(low64), \ + (dest).b = (uint32_t)((low64) >> 32), \ + (dest).c = (uint32_t)(high64), \ + (dest).high = (uint32_t)((high64) >> 32)) +static void shiftLeft128(swift_uint128_t *, int shift); +#endif +inline static int finishFormatting(char *, size_t, char *, char *, int, int); +#endif + + +// +// Helper functions needed by the binary64 formatter. +// + +#if SWIFT_DTOA_BINARY64_SUPPORT +#if HAVE_UINT128_T +#define isLessThan128x128(lhs, rhs) ((lhs) < (rhs)) +#define subtract128x128(lhs, rhs) (*(lhs) -= (rhs)) +#define multiply128xu32(lhs, rhs) (*(lhs) *= (rhs)) +#define initialize128WithHigh64(dest, value) ((dest) = (__uint128_t)(value) << 64) +#define extractHigh64From128(arg) ((uint64_t)((arg) >> 64)) +#define is128bitZero(arg) ((arg) == 0) +static int extractIntegerPart128(__uint128_t *fixed128, int integerBits) { + const int fractionBits = 128 - integerBits; + int integerPart = (int)(*fixed128 >> fractionBits); + const swift_uint128_t fixedPointMask = (((__uint128_t)1 << fractionBits) - 1); + *fixed128 &= fixedPointMask; + return integerPart; +} +#define shiftRightRoundingDown128(val, shift) ((val) >> (shift)) +#define shiftRightRoundingUp128(val, shift) (((val) + (((uint64_t)1 << (shift)) - 1)) >> (shift)) + +#else + +static int isLessThan128x128(swift_uint128_t lhs, swift_uint128_t rhs); +static void subtract128x128(swift_uint128_t *lhs, swift_uint128_t rhs); +static void multiply128xu32(swift_uint128_t *lhs, uint32_t rhs); +#define initialize128WithHigh64(dest, value) \ + ((dest).low = (dest).b = 0, \ + (dest).c = (uint32_t)(value), \ + (dest).high = (uint32_t)((value) >> 32)) +#define extractHigh64From128(arg) (((uint64_t)(arg).high << 32)|((arg).c)) +#define is128bitZero(dest) \ + (((dest).low | (dest).b | (dest).c | (dest).high) == 0) +// Treat a uint128_t as a fixed-point value with `integerBits` bits in +// the integer portion. Return the integer portion and zero it out. +static int extractIntegerPart128(swift_uint128_t *fixed128, int integerBits) { + const int highFractionBits = 32 - integerBits; + int integerPart = (int)(fixed128->high >> highFractionBits); + fixed128->high &= ((uint32_t)1 << highFractionBits) - 1; + return integerPart; +} +static swift_uint128_t shiftRightRoundingDown128(swift_uint128_t lhs, int shift); +static swift_uint128_t shiftRightRoundingUp128(swift_uint128_t lhs, int shift); +#endif +static swift_uint128_t multiply128x64RoundingDown(swift_uint128_t lhs, uint64_t rhs); +static swift_uint128_t multiply128x64RoundingUp(swift_uint128_t lhs, uint64_t rhs); +static void intervalContainingPowerOf10_Binary64(int p, swift_uint128_t *lower, swift_uint128_t *upper, int *exponent); +#endif + +// +// Helper functions used by the 256-bit backend needed for +// float80 and binary128 +// + +#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +#if HAVE_UINT128_T +// A 256-bit unsigned integer type stored as 3 64-bit words +typedef struct {uint64_t low, midlow, midhigh, high;} swift_uint256_t; +#define initialize256WithHighMidLow64(dest, high64, midhigh64, midlow64, low64) \ + ((dest).low = (low64), \ + (dest).midlow = (midlow64), \ + (dest).midhigh = (midhigh64), \ + (dest).high = (high64)) +#define is256bitZero(dest) \ + (((dest).low | (dest).midlow | (dest).midhigh | (dest).high) == 0) +static int extractIntegerPart256(swift_uint256_t *fixed256, int integerBits) { + int integerPart = (int)(fixed256->high >> (64 - integerBits)); + const uint64_t fixedPointMask = (((uint64_t)1 << (64 - integerBits)) - 1); + fixed256->high &= fixedPointMask; + return integerPart; +} +#else +// A 256-bit unsigned integer type stored as 8 32-bit words +typedef struct { uint32_t elt[8]; } swift_uint256_t; // [0]=low, [7]=high +#define initialize256WithHighMidLow64(dest, high64, midhigh64, midlow64, low64) \ + ((dest).elt[0] = (uint64_t)(low64), \ + (dest).elt[1] = (uint64_t)(low64) >> 32, \ + (dest).elt[2] = (uint64_t)(midlow64), \ + (dest).elt[3] = (uint64_t)(midlow64) >> 32, \ + (dest).elt[4] = (uint64_t)(midhigh64), \ + (dest).elt[5] = (uint64_t)(midhigh64) >> 32, \ + (dest).elt[6] = (uint64_t)(high64), \ + (dest).elt[7] = (uint64_t)(high64) >> 32) +#define is256bitZero(dest) \ + (((dest).elt[0] | (dest).elt[1] | (dest).elt[2] | (dest).elt[3] \ +| (dest).elt[4] | (dest).elt[5] | (dest).elt[6] | (dest).elt[7]) == 0) +static int extractIntegerPart256(swift_uint256_t *fixed256, int integerBits) { + int integerPart = (int)(fixed256->elt[7] >> (32 - integerBits)); + const uint64_t fixedPointMask = (((uint64_t)1 << (32 - integerBits)) - 1); + fixed256->elt[7] &= fixedPointMask; + return integerPart; +} +#endif +static void multiply256xu32(swift_uint256_t *lhs, uint32_t rhs); +// Multiply a 256-bit fraction times a 128-bit fraction, with controlled rounding +static void multiply256x128RoundingDown(swift_uint256_t *lhs, swift_uint128_t rhs); +static void multiply256x128RoundingUp(swift_uint256_t *lhs, swift_uint128_t rhs); +static void subtract256x256(swift_uint256_t *lhs, swift_uint256_t rhs); +static int isLessThan256x256(swift_uint256_t lhs, swift_uint256_t rhs); +static void shiftRightRoundingDown256(swift_uint256_t *lhs, int shift); +static void shiftRightRoundingUp256(swift_uint256_t *lhs, int shift); +static void intervalContainingPowerOf10_Binary128(int p, swift_uint256_t *lower, swift_uint256_t *upper, int *exponent); +static size_t _swift_dtoa_256bit_backend(char *, size_t, swift_uint128_t, swift_uint128_t, int, int, int, int, bool); +#endif + + +// A table of all two-digit decimal numbers +#if SWIFT_DTOA_BINARY16_SUPPORT || SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +static const char asciiDigitTable[] = + "0001020304050607080910111213141516171819" + "2021222324252627282930313233343536373839" + "4041424344454647484950515253545556575859" + "6061626364656667686970717273747576777879" + "8081828384858687888990919293949596979899"; +#endif + +// ================================================================ +// +// Helpers to output formatted results for infinity, zero, and NaN +// +// ================================================================ + +static size_t infinity(char *dest, size_t len, int negative) { + if (negative) { + if (len >= 5) { + memcpy(dest, "-inf", 5); + return 4; + } + } else { + if (len >= 4) { + memcpy(dest, "inf", 4); + return 3; + } + } + if (len > 0) { + dest[0] = '\0'; + } + return 0; +} + +static size_t zero(char *dest, size_t len, int negative) { + if (negative) { + if (len >= 5) { + memcpy(dest, "-0", 3); + return 2; + } + } else { + if (len >= 4) { + memcpy(dest, "0", 2); + return 1; + } + } + if (len > 0) { + dest[0] = '\0'; + } + return 0; +} + +static size_t nan_details(char *dest, size_t len, int negative, int quiet, uint64_t payloadHigh, uint64_t payloadLow) { + const char *sign = negative ? "-" : ""; + const char *signalingChar = quiet ? "" : "s"; + char buff[64]; + if (payloadLow != 0) { + if (payloadHigh != 0) { + snprintf(buff, sizeof(buff), "%s%snan(0x%" PRIx64 "%016" PRIx64 ")", + sign, signalingChar, payloadHigh, payloadLow); + } else { + snprintf(buff, sizeof(buff), "%s%snan(0x%" PRIx64 ")", + sign, signalingChar, payloadLow); + } + } else { + snprintf(buff, sizeof(buff), "%s%snan", + sign, signalingChar); + } + size_t nanlen = strlen(buff); + if (nanlen < len) { + memcpy(dest, buff, nanlen + 1); + return nanlen; + } + if (len > 0) { + dest[0] = '\0'; + } + return 0; +} + + +// ================================================================ +// +// BINARY16 +// +// ================================================================ + + +#if SWIFT_DTOA_BINARY16_SUPPORT +// Format an IEEE 754 binary16 half-precision floating point value +// into an optimal text form. + +// This does not assume that the C environment has any support +// for binary16. + +// Because binary16 has such a limited range, a simple exact +// implementation can fit in 32 bit arithmetic. Since we can easily +// verify every single binary16 value, this can be experimentally +// optimized. +size_t swift_dtoa_optimal_binary16_p(const void *f, char *dest, size_t length) { + static const int significandBitCount = 10; + static const uint32_t significandMask + = ((uint32_t)1 << significandBitCount) - 1; + static const int exponentBitCount = 5; + static const int exponentMask = (1 << exponentBitCount) - 1; + // See comments in swift_dtoa_optimal_binary64_p + static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 14 + + if (length < 1) { + return 0; + } + + // Step 0: Deconstruct IEEE 754 binary16 format + uint16_t raw = *(const uint16_t *)f; + int exponentBitPattern = (raw >> significandBitCount) & exponentMask; + uint16_t significandBitPattern = raw & significandMask; + int negative = raw >> 15; + + // Step 1: Handle the various input cases: + int binaryExponent; + uint16_t significand; + int isBoundary = significandBitPattern == 0; + if (exponentBitPattern == exponentMask) { // NaN or Infinity + if (isBoundary) { // Infinity + return infinity(dest, length, negative); + } else { + const int quiet = (significandBitPattern >> (significandBitCount - 1)) & 1; + uint16_t payload = significandBitPattern & ((1U << (significandBitCount - 2)) - 1); + return nan_details(dest, length, negative, quiet, 0, payload); + } + } else if (exponentBitPattern == 0) { + if (isBoundary) { // Zero + return zero(dest, length, negative); + } else { // Subnormal + binaryExponent = 1 - exponentBias; + significand = significandBitPattern; + } + } else { // normal + binaryExponent = exponentBitPattern - exponentBias; + uint16_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount; + uint16_t fullSignificand = significandBitPattern + hiddenBit; + significand = fullSignificand; + } + + // Step 2: Determine the exact target interval + significand <<= 2; + static const uint16_t halfUlp = 2; + uint32_t upperMidpointExact = significand + halfUlp; + + static const uint16_t quarterUlp = 1; + uint32_t lowerMidpointExact + = significand - (isBoundary ? quarterUlp : halfUlp); + + // Shortest output from here is "1.0" plus null byte + if (length < 4) { + dest[0] = '\0'; + return 0; + } + + char *p = dest; + if (negative) { + *p++ = '-'; + } + + if (binaryExponent < -13 || (binaryExponent == -13 && significand < 0x1a38)) { + // Format values < 10^-5 as exponential form + // We know value < 10^-5, so we can do the first scaling step unconditionally + int decimalExponent = -5; + uint32_t u = (upperMidpointExact << (28 - 13 + binaryExponent)) * 100000; + uint32_t l = (lowerMidpointExact << (28 - 13 + binaryExponent)) * 100000; + uint32_t t = (significand << (28 - 13 + binaryExponent)) * 100000; + const uint32_t mask = (1 << 28) - 1; + if (t < ((1 << 28) / 10)) { + u *= 100; l *= 100; t *= 100; + decimalExponent -= 2; + } + if (t < (1 << 28)) { + u *= 10; l *= 10; t *= 10; + decimalExponent -= 1; + } + const int uDigit = u >> 28, lDigit = l >> 28; + if (uDigit == lDigit) { + // There's more than one digit, emit a '.' and the rest + if (p > dest + length - 6) { + dest[0] = '\0'; + return 0; + } + *p++ = (t >> 28) + '0'; + *p++ = '.'; + while (true) { + u = (u & mask) * 10; l = (l & mask) * 10; + const int uDigit = u >> 28, lDigit = l >> 28; + if (uDigit != lDigit) { + t = (t & mask) * 10; + break; + } + t *= 10; + *p++ = uDigit + '0'; + } + } + t = (t + (1 << 27)) >> 28; // Add 1/2 to round + if (p > dest + length - 6) { // Exactly 6 bytes written below + dest[0] = '\0'; + return 0; + } + *p++ = t + '0'; + memcpy(p, "e-", 2); + p += 2; + memcpy(p, asciiDigitTable + (-decimalExponent) * 2, 2); + p += 2; + *p = '\0'; + return p - dest; + } + + // Format the value using decimal format + + // There's an integer portion of no more than 5 digits + int intportion; + if (binaryExponent < 13) { + intportion = significand >> (13 - binaryExponent); + significand -= intportion << (13 - binaryExponent); + } else { + intportion = significand << (binaryExponent - 13); + significand -= intportion >> (binaryExponent - 13); + } + if (intportion < 10) { + if (p > dest + length - 3) { + dest[0] = '\0'; + return 0; + } + *p++ = intportion + '0'; // One digit is the most common case + } else if (intportion < 1000) { + // 2 or 3 digits + if (p > dest + length - 4) { + dest[0] = '\0'; + return 0; + } + if (intportion > 99) { + *p++ = intportion / 100 + '0'; + } + memcpy(p, asciiDigitTable + (intportion % 100) * 2, 2); + p += 2; + } else { + // 4 or 5 digits + if (p > dest + length - 6) { + dest[0] = '\0'; + return 0; + } + if (intportion > 9999) { + *p++ = intportion / 10000 + '0'; + intportion %= 10000; + } + memcpy(p, asciiDigitTable + (intportion / 100) * 2, 2); + memcpy(p + 2, asciiDigitTable + (intportion % 100) * 2, 2); + p += 4; + } + if (p > dest + length - 3) { + dest[0] = '\0'; + return 0; + } + + if (significand == 0) { // No fraction, so we're done. + *p = '\0'; + return p - dest; + } + + *p++ = '.'; + + // Format the fractional part + uint32_t u = upperMidpointExact << (28 - 13 + binaryExponent); + uint32_t l = lowerMidpointExact << (28 - 13 + binaryExponent); + uint32_t t = significand << (28 - 13 + binaryExponent); + const uint32_t mask = (1 << 28) - 1; + unsigned uDigit, lDigit; + while (true) { + u = (u & mask) * 10; l = (l & mask) * 10; + uDigit = u >> 28; lDigit = l >> 28; + if (uDigit != lDigit) { + t = (t & mask) * 10; + break; + } + t *= 10; + if (p > dest + length - 3) { + dest[0] = '\0'; + return 0; + } + *p++ = uDigit + '0'; + } + t += 1 << 27; // Add 1/2 + if ((t & mask) == 0) { // Was exactly 1/2 (now zero) + t = (t >> 28) & ~1; // Round even + } else { + t >>= 28; + } + if (t <= lDigit && l > 0) + t += 1; + *p++ = t + '0'; + *p = '\0'; + return p - dest; +} +#endif + +// ================================================================ +// +// BINARY32 +// +// ================================================================ + + +#if SWIFT_DTOA_BINARY32_SUPPORT +#if FLOAT_IS_BINARY32 +// Format a C `float` +size_t swift_dtoa_optimal_float(float d, char *dest, size_t length) { + return swift_dtoa_optimal_binary32_p(&d, dest, length); +} +#endif + +// Format an IEEE 754 single-precision binary32 format floating-point number. +size_t swift_dtoa_optimal_binary32_p(const void *f, char *dest, size_t length) +{ + static const int significandBitCount = FLT_MANT_DIG - 1; + static const uint32_t significandMask + = ((uint32_t)1 << significandBitCount) - 1; + static const int exponentBitCount = 8; + static const int exponentMask = (1 << exponentBitCount) - 1; + // See comments in swift_dtoa_optimal_binary64_p + static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 125 + + // Step 0: Deconstruct the target number + // Note: this strongly assumes IEEE 754 binary32 format + uint32_t raw = *(const uint32_t *)f; + int exponentBitPattern = (raw >> significandBitCount) & exponentMask; + uint32_t significandBitPattern = raw & significandMask; + int negative = raw >> 31; + + // Step 1: Handle the various input cases: + int binaryExponent; + uint32_t significand; + if (length < 1) { + return 0; + } else if (exponentBitPattern == exponentMask) { // NaN or Infinity + if (significandBitPattern == 0) { // Infinity + return infinity(dest, length, negative); + } else { // NaN + const int quiet = (significandBitPattern >> (significandBitCount - 1)) & 1; + uint32_t payload = raw & ((1UL << (significandBitCount - 2)) - 1); + return nan_details(dest, length, negative, quiet != 0, 0, payload); + } + } else if (exponentBitPattern == 0) { + if (significandBitPattern == 0) { // Zero + return zero(dest, length, negative); + } else { // Subnormal + binaryExponent = 1 - exponentBias; + significand = significandBitPattern << (32 - significandBitCount - 1); + } + } else { // normal + binaryExponent = exponentBitPattern - exponentBias; + uint32_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount; + uint32_t fullSignificand = significandBitPattern + hiddenBit; + significand = fullSignificand << (32 - significandBitCount - 1); + } + + // Step 2: Determine the exact unscaled target interval + static const uint32_t halfUlp = (uint32_t)1 << (32 - significandBitCount - 2); + uint64_t upperMidpointExact = (uint64_t)(significand + halfUlp); + + int isBoundary = significandBitPattern == 0; + static const uint32_t quarterUlp = halfUlp >> 1; + uint64_t lowerMidpointExact + = (uint64_t)(significand - (isBoundary ? quarterUlp : halfUlp)); + + // Step 3: Estimate the base 10 exponent + int base10Exponent = decimalExponentFor2ToThe(binaryExponent); + + // Step 4: Compute a power-of-10 scale factor + uint64_t powerOfTenRoundedDown = 0; + uint64_t powerOfTenRoundedUp = 0; + int powerOfTenExponent = 0; + static const int bulkFirstDigits = 1; + intervalContainingPowerOf10_Binary32(-base10Exponent + bulkFirstDigits - 1, + &powerOfTenRoundedDown, + &powerOfTenRoundedUp, + &powerOfTenExponent); + const int extraBits = binaryExponent + powerOfTenExponent; + + // Step 5: Scale the interval (with rounding) + static const int integerBits = 8; + const int shift = integerBits - extraBits; + const int roundUpBias = (1 << shift) - 1; + static const int fractionBits = 64 - integerBits; + static const uint64_t fractionMask = ((uint64_t)1 << fractionBits) - (uint64_t)1; + uint64_t u, l; + if (significandBitPattern & 1) { + // Narrow the interval (odd significand) + uint64_t u1 = multiply64x32RoundingDown(powerOfTenRoundedDown, + upperMidpointExact); + u = u1 >> shift; // Rounding down + + uint64_t l1 = multiply64x32RoundingUp(powerOfTenRoundedUp, + lowerMidpointExact); + l = (l1 + roundUpBias) >> shift; // Rounding Up + } else { + // Widen the interval (even significand) + uint64_t u1 = multiply64x32RoundingUp(powerOfTenRoundedUp, + upperMidpointExact); + u = (u1 + roundUpBias) >> shift; // Rounding Up + + uint64_t l1 = multiply64x32RoundingDown(powerOfTenRoundedDown, + lowerMidpointExact); + l = l1 >> shift; // Rounding down + } + + // Step 6: Align first digit, adjust exponent + // In particular, this prunes leading zeros from subnormals + uint64_t t = u; + uint64_t delta = u - l; + while (t < (uint64_t)1 << fractionBits) { + base10Exponent -= 1; + t *= 10; + delta *= 10; + } + + // Step 7: Generate decimal digits into the destination buffer + char *p = dest; + if (p > dest + length - 3) { + dest[0] = '\0'; + return 0; + } + if (negative) { + *p++ = '-'; + } + char * const firstOutputChar = p; + // Format first digit as a 2-digit value to get a leading '0' + memcpy(p, asciiDigitTable + (t >> fractionBits) * 2, 2); + t &= fractionMask; + p += 2; + + // Emit two digits at a time + while ((delta * 10) < ((t * 10) & fractionMask)) { + if (p > dest + length - 3) { + dest[0] = '\0'; + return 0; + } + delta *= 100; + t *= 100; + memcpy(p, asciiDigitTable + (t >> fractionBits) * 2, 2); + t &= fractionMask; + p += 2; + } + + // Emit any final digit + if (delta < t) { + if (p > dest + length - 2) { + dest[0] = '\0'; + return 0; + } + delta *= 10; + t *= 10; + *p++ = '0' + (t >> fractionBits); + t &= fractionMask; + } + + // Adjust the final digit to be closer to the original value + if (delta > t + ((uint64_t)1 << fractionBits)) { + uint64_t skew; + if (isBoundary) { + skew = delta - delta / 3 - t; + } else { + skew = delta / 2 - t; + } + uint64_t one = (uint64_t)(1) << (64 - integerBits); + uint64_t lastAccurateBit = 1ULL << 24; + uint64_t fractionMask = (one - 1) & ~(lastAccurateBit - 1); + uint64_t oneHalf = one >> 1; + if (((skew + (lastAccurateBit >> 1)) & fractionMask) == oneHalf) { + // If the skew is exactly integer + 1/2, round the last + // digit even after adjustment + int adjust = (int)(skew >> (64 - integerBits)); + p[-1] -= adjust; + p[-1] &= ~1; + } else { + // Else round to nearest... + int adjust = (int)((skew + oneHalf) >> (64 - integerBits)); + p[-1] -= adjust; + } + } + + int forceExponential = binaryExponent > 25 || (binaryExponent == 25 && !isBoundary); + return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent); +} +#endif + + +// ================================================================ +// +// BINARY64 +// +// ================================================================ + +#if SWIFT_DTOA_BINARY64_SUPPORT +#if LONG_DOUBLE_IS_BINARY64 +size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) { + return swift_dtoa_optimal_binary64_p(&d, dest, length); +} +#endif +#if DOUBLE_IS_BINARY64 +size_t swift_dtoa_optimal_double(double d, char *dest, size_t length) { + return swift_dtoa_optimal_binary64_p(&d, dest, length); +} +#endif + +// Format an IEEE 754 double-precision binary64 format floating-point number. + +// The calling convention here assumes that C `double` is this format, +// but otherwise, this does not utilize any floating-point arithmetic +// or library routines. +size_t swift_dtoa_optimal_binary64_p(const void *d, char *dest, size_t length) +{ + // Bits in raw significand (not including hidden bit, if present) + static const int significandBitCount = DBL_MANT_DIG - 1; + static const uint64_t significandMask + = ((uint64_t)1 << significandBitCount) - 1; + // Bits in raw exponent + static const int exponentBitCount = 11; + static const int exponentMask = (1 << exponentBitCount) - 1; + // Note: IEEE 754 conventionally uses 1023 as the exponent + // bias. That's because they treat the significand as a + // fixed-point number with one bit (the hidden bit) integer + // portion. The logic here reconstructs the significand as a + // pure fraction, so we need to accomodate that when + // reconstructing the binary exponent. + static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 1022 + + // Step 0: Deconstruct an IEEE 754 binary64 double-precision value + uint64_t raw = *(const uint64_t *)d; + int exponentBitPattern = (raw >> significandBitCount) & exponentMask; + uint64_t significandBitPattern = raw & significandMask; + int negative = raw >> 63; + + // Step 1: Handle the various input cases: + if (length < 1) { + return 0; + } + int binaryExponent; + int isBoundary = significandBitPattern == 0; + uint64_t significand; + if (exponentBitPattern == exponentMask) { // NaN or Infinity + if (isBoundary) { // Infinity + return infinity(dest, length, negative); + } else { + const int quiet = (raw >> (significandBitCount - 1)) & 1; + uint64_t payload = raw & ((1ull << (significandBitCount - 2)) - 1); + return nan_details(dest, length, negative, quiet, 0, payload); + } + } else if (exponentBitPattern == 0) { + if (isBoundary) { // Zero + return zero(dest, length, negative); + } else { // subnormal + binaryExponent = 1 - exponentBias; + significand = significandBitPattern + << (64 - significandBitCount - 1); + } + } else { // normal + binaryExponent = exponentBitPattern - exponentBias; + uint64_t hiddenBit = (uint64_t)1 << significandBitCount; + uint64_t fullSignificand = significandBitPattern + hiddenBit; + significand = fullSignificand << (64 - significandBitCount - 1); + } + + // Step 2: Determine the exact unscaled target interval + + // Grisu-style algorithms construct the shortest decimal digit + // sequence within a specific interval. To build the appropriate + // interval, we start by computing the midpoints between this + // floating-point value and the adjacent ones. Note that this + // step is an exact computation. + + uint64_t halfUlp = (uint64_t)1 << (64 - significandBitCount - 2); + uint64_t quarterUlp = halfUlp >> 1; + uint64_t upperMidpointExact = significand + halfUlp; + + uint64_t lowerMidpointExact + = significand - (isBoundary ? quarterUlp : halfUlp); + + int isOddSignificand = (significandBitPattern & 1) != 0; + + // Step 3: Estimate the base 10 exponent + + // Grisu algorithms are based in part on a simple technique for + // generating a base-10 form for a binary floating-point number. + // Start with a binary floating-point number `f * 2^e` and then + // estimate the decimal exponent `p`. You can then rewrite your + // original number as: + // + // ``` + // f * 2^e * 10^-p * 10^p + // ``` + // + // The last term is part of our output, and a good estimate for + // `p` will ensure that `2^e * 10^-p` is close to 1. Multiplying + // the first three terms then yields a fraction suitable for + // producing the decimal digits. Here we use a very fast estimate + // of `p` that is never off by more than 1; we'll have + // opportunities later to correct any error. + + int base10Exponent = decimalExponentFor2ToThe(binaryExponent); + + // Step 4: Compute a power-of-10 scale factor + + // Compute `10^-p` to 128-bit precision. We generate + // both over- and under-estimates to ensure we can exactly + // bound the later use of these values. + swift_uint128_t powerOfTenRoundedDown; + swift_uint128_t powerOfTenRoundedUp; + int powerOfTenExponent = 0; + static const int bulkFirstDigits = 7; + static const int bulkFirstDigitFactor = 1000000; // 10^(bulkFirstDigits - 1) + // Note the extra factor of 10^bulkFirstDigits -- that will give + // us a headstart on digit generation later on. (In contrast, Ryu + // uses an extra factor of 10^17 here to get all the digits up + // front, but then has to back out any extra digits. Doing that + // with a 17-digit value requires 64-bit division, which is the + // root cause of Ryu's poor performance on 32-bit processors. We + // also might have to back out extra digits if 7 is too many, but + // will only need 32-bit division in that case.) + intervalContainingPowerOf10_Binary64(-base10Exponent + bulkFirstDigits - 1, + &powerOfTenRoundedDown, + &powerOfTenRoundedUp, + &powerOfTenExponent); + const int extraBits = binaryExponent + powerOfTenExponent; + + // Step 5: Scale the interval (with rounding) + + // As mentioned above, the final digit generation works + // with an interval, so we actually apply the scaling + // to the upper and lower midpoint values separately. + + // As part of the scaling here, we'll switch from a pure + // fraction with zero bit integer portion and 128-bit fraction + // to a fixed-point form with 32 bits in the integer portion. + static const int integerBits = 32; + + // We scale the interval in one of two different ways, + // depending on whether the significand is even or odd... + + swift_uint128_t u, l; + if (isOddSignificand) { + // Case A: Narrow the interval (odd significand) + + // Loitsch' original Grisu2 always rounds so as to narrow the + // interval. Since our digit generation will select a value + // within the scaled interval, narrowing the interval + // guarantees that we will find a digit sequence that converts + // back to the original value. + + // This ensures accuracy but, as explained in Loitsch' paper, + // this carries a risk that there will be a shorter digit + // sequence outside of our narrowed interval that we will + // miss. This risk obviously gets lower with increased + // precision, but it wasn't until the Errol paper that anyone + // had a good way to test whether a particular implementation + // had sufficient precision. That paper shows a way to enumerate + // the worst-case numbers; those numbers that are extremely close + // to the mid-points between adjacent floating-point values. + // These are the values that might sit just outside of the + // narrowed interval. By testing these values, we can verify + // the correctness of our implementation. + + // Multiply out the upper midpoint, rounding down... + swift_uint128_t u1 = multiply128x64RoundingDown(powerOfTenRoundedDown, + upperMidpointExact); + // Account for residual binary exponent and adjust + // to the fixed-point format + u = shiftRightRoundingDown128(u1, integerBits - extraBits); + + // Conversely for the lower midpoint... + swift_uint128_t l1 = multiply128x64RoundingUp(powerOfTenRoundedUp, + lowerMidpointExact); + l = shiftRightRoundingUp128(l1, integerBits - extraBits); + + } else { + // Case B: Widen the interval (even significand) + + // As explained in Errol Theorem 6, in certain cases there is + // a short decimal representation at the exact boundary of the + // scaled interval. When such a number is converted back to + // binary, it will get rounded to the adjacent even + // significand. + + // So when the significand is even, we round so as to widen + // the interval in order to ensure that the exact midpoints + // are considered. Of couse, this ensures that we find a + // short result but carries a risk of selecting a result + // outside of the exact scaled interval (which would be + // inaccurate). + + // The same testing approach described above (based on results + // in the Errol paper) also applies + // to this case. + + swift_uint128_t u1 = multiply128x64RoundingUp(powerOfTenRoundedUp, + upperMidpointExact); + u = shiftRightRoundingUp128(u1, integerBits - extraBits); + + swift_uint128_t l1 = multiply128x64RoundingDown(powerOfTenRoundedDown, + lowerMidpointExact); + l = shiftRightRoundingDown128(l1, integerBits - extraBits); + } + + // Step 6: Align first digit, adjust exponent + + // Calculations above used an estimate for the power-of-ten scale. + // Here, we compensate for any error in that estimate by testing + // whether we have the expected number of digits in the integer + // portion and correcting as necesssary. This also serves to + // prune leading zeros from subnormals. + + // Except for subnormals, this loop should never run more than once. + // For subnormals, this might run as many as 16 + bulkFirstDigits + // times. +#if HAVE_UINT128_T + while (u < ((__uint128_t)bulkFirstDigitFactor << (128 - integerBits))) +#else + while (u.high < ((uint32_t)bulkFirstDigitFactor << (32 - integerBits))) +#endif + { + base10Exponent -= 1; + multiply128xu32(&l, 10); + multiply128xu32(&u, 10); + } + + // Step 7: Produce decimal digits + + // One standard approach generates digits for the scaled upper and + // lower boundaries and stops when at the first digit that + // differs. For example, note that 0.1234 is the shortest decimal + // between u = 0.123456 and l = 0.123345. + + // Grisu optimizes this by generating digits for the upper bound + // (multiplying by 10 to isolate each digit) while simultaneously + // scaling the interval width `delta`. As we remove each digit + // from the upper bound, the remainder is the difference between + // the base-10 value generated so far and the true upper bound. + // When that remainder is less than the scaled width of the + // interval, we know the current digits specify a value within the + // target interval. + + // The logic below actually blends three different digit-generation + // strategies: + // * The first digits are already in the integer portion of the + // fixed-point value, thanks to the `bulkFirstDigits` factor above. + // We can just break those down and write them out. + // * If we generated too many digits, we use a Ryu-inspired technique + // to backtrack. + // * If we generated too few digits (the usual case), we use an + // optimized form of the Grisu2 method to produce the remaining + // values. + + // Generate digits for `t` with interval width `delta = u - l` + swift_uint128_t t = u; + swift_uint128_t delta = u; + subtract128x128(&delta, l); + + char *p = dest; + if (negative) { + if (p >= dest + length) { + dest[0] = '\0'; + return 0; + } + *p++ = '-'; + } + char * const firstOutputChar = p; + + // The `bulkFirstDigits` adjustment above already set up the first 7 digits + // Format as 8 digits (with a leading zero that we'll exploit later on). + uint32_t d12345678 = extractIntegerPart128(&t, integerBits); + + if (!isLessThan128x128(delta, t)) { + // Oops! We have too many digits. Back out the extra ones to + // get the right answer. This is similar to Ryu, but since + // we've only produced seven digits, we only need 32-bit + // arithmetic here. A few notes: + // * Our target hardware always supports 32-bit hardware division, + // so this should be reasonably fast. + // * For small integers (like "2"), Ryu would have to back out 16 + // digits; we only have to back out 6. + // * Very few double-precision values actually need fewer than 7 + // digits. So this is rarely used except in workloads that + // specifically use double for small integers. This is more + // common for binary32, of course. + + // TODO: Add benchmarking for "small integers" -1000...1000 to + // verify that this does not unduly penalize those values. + + // Why this is critical for performance: In order to use the + // 8-digits-at-a-time optimization below, we need at least 30 + // bits in the integer part of our fixed-point format above. If + // we only use bulkDigits = 1, that leaves only 128 - 30 = 98 + // bit accuracy for our scaling step, which isn't enough + // (binary64 needs ~110 bits for correctness). So we have to + // use a large bulkDigits value to make full use of the 128-bit + // scaling above, which forces us to have some form of logic to + // handle the case of too many digits. The alternatives are to + // use >128 bit values (slower) or do some complex finessing of + // bit counts by working with powers of 5 instead of 10. + +#if HAVE_UINT128_T + uint64_t uHigh = u >> 64; + uint64_t lHigh = l >> 64; + if (0 != (uint64_t)l) { + lHigh += 1; + } +#else + uint64_t uHigh = ((uint64_t)u.high << 32) + u.c; + uint64_t lHigh = ((uint64_t)l.high << 32) + l.c; + if (0 != (l.b | l.low)) { + lHigh += 1; + } +#endif + uint64_t tHigh; + if (isBoundary) { + tHigh = (uHigh + lHigh * 2) / 3; + } else { + tHigh = (uHigh + lHigh) / 2; + } + + uint32_t u0 = uHigh >> (64 - integerBits); + uint32_t l0 = lHigh >> (64 - integerBits); + if ((lHigh & ((1ULL << (64 - integerBits)) - 1)) != 0) { + l0 += 1; + } + uint32_t t0 = tHigh >> (64 - integerBits); + int t0digits = 8; + + uint32_t u1 = u0 / 10; + uint32_t l1 = (l0 + 9) / 10; + int trailingZeros = is128bitZero(t); + int droppedDigit = ((tHigh * 10) >> (64 - integerBits)) % 10; + while (u1 >= l1 && u1 != 0) { + u0 = u1; + l0 = l1; + trailingZeros &= droppedDigit == 0; + droppedDigit = t0 % 10; + t0 /= 10; + t0digits--; + u1 = u0 / 10; + l1 = (l0 + 9) / 10; + } + // Correct the final digit + if (droppedDigit > 5 || (droppedDigit == 5 && !trailingZeros)) { + t0 += 1; + } else if (droppedDigit == 5 && trailingZeros) { + t0 += 1; + t0 &= ~1; + } + // t0 has t0digits digits. Write them out + if (p > dest + length - t0digits - 1) { // Make sure we have space + dest[0] = '\0'; + return 0; + } + int i = t0digits; + while (i > 1) { // Write out 2 digits at a time back-to-front + i -= 2; + memcpy(p + i, asciiDigitTable + (t0 % 100) * 2, 2); + t0 /= 100; + } + if (i > 0) { // Handle an odd number of digits + p[0] = t0 + '0'; + } + p += t0digits; // Move the pointer past the digits we just wrote + } else { + // + // Our initial scaling did not produce too many digits. + // The `d12345678` value holds the first 7 digits (plus + // a leading zero that will be useful later). We write + // those out and then incrementally generate as many + // more digits as necessary. The remainder of this + // algorithm is basically just Grisu2. + // + + if (p > dest + length - 9) { + dest[0] = '\0'; + return 0; + } + // Write out the 7 digits we got earlier + leading zero + int d1234 = d12345678 / 10000; + int d5678 = d12345678 % 10000; + int d78 = d5678 % 100; + int d56 = d5678 / 100; + memcpy(p + 6, asciiDigitTable + d78 * 2, 2); + memcpy(p + 4, asciiDigitTable + d56 * 2, 2); + int d34 = d1234 % 100; + int d12 = d1234 / 100; + memcpy(p + 2, asciiDigitTable + d34 * 2, 2); + memcpy(p, asciiDigitTable + d12 * 2, 2); + p += 8; + + // Seven digits wasn't enough, so let's get some more. + // Most binary64 values need >= 15 digits total. We already have seven, + // so try grabbing the next 8 digits all at once. + // (This is suboptimal for binary32, but the code savings + // from sharing this implementation are worth it.) + static const uint32_t bulkDigitFactor = 100000000; // 10^(15-bulkFirstDigits) + swift_uint128_t d0 = delta; + multiply128xu32(&d0, bulkDigitFactor); + swift_uint128_t t0 = t; + multiply128xu32(&t0, bulkDigitFactor); + int bulkDigits = extractIntegerPart128(&t0, integerBits); // 9 digits + if (isLessThan128x128(d0, t0)) { + if (p > dest + length - 9) { + dest[0] = '\0'; + return 0; + } + // Next 8 digits are good; add them to the output + int d1234 = bulkDigits / 10000; + int d5678 = bulkDigits % 10000; + int d78 = d5678 % 100; + int d56 = d5678 / 100; + memcpy(p + 6, asciiDigitTable + d78 * 2, 2); + memcpy(p + 4, asciiDigitTable + d56 * 2, 2); + int d34 = d1234 % 100; + int d12 = d1234 / 100; + memcpy(p + 2, asciiDigitTable + d34 * 2, 2); + memcpy(p, asciiDigitTable + d12 * 2, 2); + p += 8; + + t = t0; + delta = d0; + } + + // Finish up by generating and writing one digit at a time. + while (isLessThan128x128(delta, t)) { + if (p > dest + length - 2) { + dest[0] = '\0'; + return 0; + } + multiply128xu32(&delta, 10); + multiply128xu32(&t, 10); + *p++ = '0' + extractIntegerPart128(&t, integerBits); + } + + // Adjust the final digit to be closer to the original value. This accounts + // for the fact that sometimes there is more than one shortest digit + // sequence. + + // For example, consider how the above would work if you had the + // value 0.1234 and computed u = 0.1257, l = 0.1211. The above + // digit generation works with `u`, so produces 0.125. But the + // values 0.122, 0.123, and 0.124 are just as short and 0.123 is + // the best choice, since it's closest to the original value. + + // We know delta and t are both less than 10.0 here, so we can + // shed some excess integer bits to simplify the following: + const int adjustIntegerBits = 4; // Integer bits for "adjust" phase + shiftLeft128(&delta, integerBits - adjustIntegerBits); + shiftLeft128(&t, integerBits - adjustIntegerBits); + + // Note: We've already consumed most of our available precision, + // so it's okay to just work in 64 bits for this... + uint64_t deltaHigh64 = extractHigh64From128(delta); + uint64_t tHigh64 = extractHigh64From128(t); + + // If `delta < t + 1.0`, then the interval is narrower than + // one decimal digit, so there is no other option. + if (deltaHigh64 >= tHigh64 + ((uint64_t)1 << (64 - adjustIntegerBits))) { + uint64_t skew; + if (isBoundary) { + // If we're at the boundary where the exponent shifts, + // then the original value is 1/3 of the way from + // the bottom of the interval ... + skew = deltaHigh64 - deltaHigh64 / 3 - tHigh64; + } else { + // ... otherwise it's exactly in the middle. + skew = deltaHigh64 / 2 - tHigh64; + } + + // The `skew` above is the difference between our + // computed digits and the original exact value. + // Use that to offset the final digit: + uint64_t one = (uint64_t)(1) << (64 - adjustIntegerBits); + uint64_t fractionMask = one - 1; + uint64_t oneHalf = one >> 1; + if ((skew & fractionMask) == oneHalf) { + int adjust = (int)(skew >> (64 - adjustIntegerBits)); + // If the skew is exactly integer + 1/2, round the + // last digit even after adjustment + p[-1] -= adjust; + p[-1] &= ~1; + } else { + // Else round to nearest... + int adjust = (int)((skew + oneHalf) >> (64 - adjustIntegerBits)); + p[-1] -= adjust; + } + } + } + + // Step 8: Shuffle digits into the final textual form + int forceExponential = binaryExponent > 54 || (binaryExponent == 54 && !isBoundary); + return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent); +} +#endif + +// ================================================================ +// +// FLOAT80 +// +// ================================================================ + +#if SWIFT_DTOA_FLOAT80_SUPPORT +#if LONG_DOUBLE_IS_FLOAT80 +size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) { + return swift_dtoa_optimal_float80_p(&d, dest, length); +} +#endif + +// Format an Intel x87 80-bit extended precision floating-point format +// This does not rely on the C environment for floating-point arithmetic +// or library support of any kind. +size_t swift_dtoa_optimal_float80_p(const void *d, char *dest, size_t length) +{ + static const int exponentBitCount = 15; + static const int exponentMask = (1 << exponentBitCount) - 1; + // See comments in swift_dtoa_optimal_binary64_p to understand + // why we use 16,382 instead of 16,383 here. + static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 16,382 + + // Step 0: Deconstruct the target number + // Note: this strongly assumes Intel 80-bit extended format in LSB + // byte order + const uint64_t *raw_p = (const uint64_t *)d; + int exponentBitPattern = raw_p[1] & exponentMask; + int negative = (raw_p[1] >> 15) & 1; + uint64_t significandBitPattern = raw_p[0]; + + // Step 1: Handle the various input cases: + int64_t binaryExponent; + uint64_t significand; + int isBoundary = (significandBitPattern & 0x7fffffffffffffff) == 0; + if (length < 1) { + return 0; + } else if (exponentBitPattern == exponentMask) { // NaN or Infinity + // Following 80387 semantics as documented in Wikipedia.org "Extended Precision" + // Also see Intel's "Floating Point Reference Sheet" + // https://software.intel.com/content/dam/develop/external/us/en/documents/floating-point-reference-sheet.pdf + int selector = significandBitPattern >> 62; // Top 2 bits + uint64_t payload = significandBitPattern & (((uint64_t)1 << 62) - 1); // bottom 62 bits + switch (selector) { + case 0: // ∞ or snan on 287, invalid on 387 + case 1: // Pseudo-NaN: snan on 287, invalid on 387 + break; + case 2: + if (payload == 0) { // snan on 287, ∞ on 387 + return infinity(dest, length, negative); + } else { // snan on 287 and 387 + return nan_details(dest, length, negative, 0 /* quiet */, 0, payload); + } + break; + case 3: + // Zero payload and sign bit set is "indefinite" (treated as qNaN here), + // Otherwise qNan on 387, sNaN on 287 + return nan_details(dest, length, negative, 1 /* quiet */, 0, payload); + } + // Handle "invalid" patterns as plain "nan" + return nan_details(dest, length, 0 /* negative */, 1 /* quiet */, 0, payload); + } else if (exponentBitPattern == 0) { + if (significandBitPattern == 0) { // Zero + return zero(dest, length, negative); + } else { // subnormal + binaryExponent = 1 - exponentBias; + significand = significandBitPattern; + } + } else if (significandBitPattern >> 63) { // Normal + binaryExponent = exponentBitPattern - exponentBias; + significand = significandBitPattern; + } else { + // Invalid pattern rejected by 80387 and later. + // Handle "invalid" patterns as plain "nan" + return nan_details(dest, length, 0 /* negative */, 1 /* quiet */, 0, 0); + } + + // Step 2: Determine the exact unscaled target interval + uint64_t halfUlp = (uint64_t)1 << 63; + uint64_t quarterUlp = halfUlp >> 1; + uint64_t threeQuarterUlp = halfUlp + quarterUlp; + swift_uint128_t upperMidpointExact, lowerMidpointExact; + initialize128WithHighLow64(upperMidpointExact, significand, halfUlp); + // Subtract 1/4 or 1/2 ULP by first subtracting 1 full ULP, then adding some back + initialize128WithHighLow64(lowerMidpointExact, significand - 1, isBoundary ? threeQuarterUlp : halfUlp); + + return _swift_dtoa_256bit_backend + ( + dest, + length, + upperMidpointExact, + lowerMidpointExact, + negative, + isBoundary, + (significandBitPattern & 1) != 0, + binaryExponent, + binaryExponent > 65 || (binaryExponent == 65 && !isBoundary) // forceExponential + ); + +} +#endif + +// ================================================================ +// +// BINARY128 +// +// ================================================================ + +#if SWIFT_DTOA_BINARY128_SUPPORT +#if LONG_DOUBLE_IS_BINARY128 +size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) { + return swift_dtoa_optimal_binary128_p(&d, dest, length); +} +#endif + +// Format an IEEE 754 binary128 quad-precision floating-point number. +// This does not rely on the C environment for floating-point arithmetic +// or library support of any kind. +size_t swift_dtoa_optimal_binary128_p(const void *d, char *dest, size_t length) +{ + static const int exponentBitCount = 15; + static const int exponentMask = (1 << exponentBitCount) - 1; + // See comments in swift_dtoa_optimal_binary64_p to understand + // why we use 16,382 instead of 16,383 here. + static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 16,382 + + // Step 0: Deconstruct the target number in IEEE 754 binary128 LSB format + const uint64_t *raw_p = (const uint64_t *)d; + int exponentBitPattern = (raw_p[1] >> 48) & exponentMask; + int negative = (raw_p[1] >> 63) & 1; + uint64_t significandHigh = raw_p[1] & 0xffffffffffffULL; + uint64_t significandLow = raw_p[0]; + + // Step 1: Handle the various input cases: + int64_t binaryExponent; + int isBoundary = (significandLow == 0) && (significandHigh == 0); + if (length < 1) { + return 0; + } else if (exponentBitPattern == exponentMask) { // NaN or Infinity + if (isBoundary) { // Infinity + return infinity(dest, length, negative); + } else { // NaN + int signaling = (significandHigh >> 47) & 1; + uint64_t payloadHigh = significandHigh & 0x3fffffffffffULL; + uint64_t payloadLow = significandLow; + return nan_details(dest, length, negative, signaling == 0, payloadHigh, payloadLow); + } + } else if (exponentBitPattern == 0) { + if (isBoundary) { // Zero + return zero(dest, length, negative); + } else { // subnormal + binaryExponent = 1 - exponentBias; + } + } else { // Normal + binaryExponent = exponentBitPattern - exponentBias; + significandHigh |= (1ULL << 48); + } + // Align significand to 0.113 fractional form + significandHigh <<= 15; + significandHigh |= significandLow >> (64 - 15); + significandLow <<= 15; + + // Step 2: Determine the exact unscaled target interval + uint64_t halfUlp = (uint64_t)1 << 14; + uint64_t quarterUlp = halfUlp >> 1; + swift_uint128_t upperMidpointExact, lowerMidpointExact; + initialize128WithHighLow64(upperMidpointExact, significandHigh, significandLow + halfUlp); + // Subtract 1/4 or 1/2 ULP + if (significandLow == 0) { + initialize128WithHighLow64(lowerMidpointExact, + significandHigh - 1, + significandLow - (isBoundary ? quarterUlp : halfUlp)); + } else { + initialize128WithHighLow64(lowerMidpointExact, + significandHigh, + significandLow - (isBoundary ? quarterUlp : halfUlp)); + } + + return _swift_dtoa_256bit_backend + ( + dest, + length, + upperMidpointExact, + lowerMidpointExact, + negative, + isBoundary, + (significandLow & 0x8000) != 0, + binaryExponent, + binaryExponent > 114 || (binaryExponent == 114 && !isBoundary) // forceExponential + ); +} +#endif + +// ================================================================ +// +// FLOAT80/BINARY128 common backend +// +// This uses 256-bit fixed-width arithmetic to efficiently compute the +// optimal form for a decomposed float80 or binary128 value. It is +// less heavily commented than the 128-bit version above; see that +// implementation for detailed explanation of the logic here. +// +// This sacrifices some performance for float80, which can be done +// more efficiently with 192-bit fixed-width arithmetic. But the code +// size savings from sharing this logic between float80 and binary128 +// are substantial, and the resulting float80 performance is still much +// better than most competing implementations. +// +// Also in the interest of code size savings, this eschews some of the +// optimizations used by the 128-bit backend above. Those +// optimizations are simple to reintroduce if you're interested in +// further performance improvements. +// +// If you are interested in extreme code size, you can also use this +// backend for binary32 and binary64, eliminating the separate 128-bit +// implementation. That variation offers surprisingly reasonable +// performance overall. +// +// ================================================================ + +#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +static size_t _swift_dtoa_256bit_backend +( + char *dest, + size_t length, + swift_uint128_t upperMidpointExact, + swift_uint128_t lowerMidpointExact, + int negative, + int isBoundary, + int isOddSignificand, + int binaryExponent, + bool forceExponential +) +{ + // Step 3: Estimate the base 10 exponent + int base10Exponent = decimalExponentFor2ToThe(binaryExponent); + + // Step 4: Compute a power-of-10 scale factor + swift_uint256_t powerOfTenRoundedDown; + swift_uint256_t powerOfTenRoundedUp; + int powerOfTenExponent = 0; + intervalContainingPowerOf10_Binary128(-base10Exponent, + &powerOfTenRoundedDown, + &powerOfTenRoundedUp, + &powerOfTenExponent); + const int extraBits = binaryExponent + powerOfTenExponent; + + // Step 5: Scale the interval (with rounding) + static const int integerBits = 14; // Enough for 4 decimal digits +#if HAVE_UINT128_T + static const int highFractionBits = 64 - integerBits; +#else + static const int highFractionBits = 32 - integerBits; +#endif + swift_uint256_t u, l; + if (isOddSignificand) { + // Narrow the interval (odd significand) + u = powerOfTenRoundedDown; + multiply256x128RoundingDown(&u, upperMidpointExact); + shiftRightRoundingDown256(&u, integerBits - extraBits); + + l = powerOfTenRoundedUp; + multiply256x128RoundingUp(&l, lowerMidpointExact); + shiftRightRoundingUp256(&l, integerBits - extraBits); + } else { + // Widen the interval (even significand) + u = powerOfTenRoundedUp; + multiply256x128RoundingUp(&u, upperMidpointExact); + shiftRightRoundingUp256(&u, integerBits - extraBits); + + l = powerOfTenRoundedDown; + multiply256x128RoundingDown(&l, lowerMidpointExact); + shiftRightRoundingDown256(&l, integerBits - extraBits); + } + + // Step 6: Align first digit, adjust exponent +#if HAVE_UINT128_T + while (u.high < (uint64_t)1 << highFractionBits) +#else + while (u.elt[7] < (uint64_t)1 << highFractionBits) +#endif + { + base10Exponent -= 1; + multiply256xu32(&l, 10); + multiply256xu32(&u, 10); + } + + swift_uint256_t t = u; + swift_uint256_t delta = u; + subtract256x256(&delta, l); + + // Step 7: Generate digits + char *p = dest; + if (p > dest + length - 4) { // Shortest output is "1.0" (4 bytes) + dest[0] = '\0'; + return 0; + } + if (negative) { + *p++ = '-'; + } + char * const firstOutputChar = p; + + // Adjustment above already set up the first digit + *p++ = '0'; + *p++ = '0' + extractIntegerPart256(&t, integerBits); + + // Generate 4 digits at a time + swift_uint256_t d0 = delta; + multiply256xu32(&d0, 10000); + swift_uint256_t t0 = t; + multiply256xu32(&t0, 10000); + int d1234 = extractIntegerPart256(&t0, integerBits); + while (isLessThan256x256(d0, t0)) { + if (p > dest + length - 5) { + dest[0] = '\0'; + return 0; + } + int d34 = d1234 % 100; + int d12 = d1234 / 100; + memcpy(p + 2, asciiDigitTable + d34 * 2, 2); + memcpy(p, asciiDigitTable + d12 * 2, 2); + p += 4; + t = t0; + delta = d0; + multiply256xu32(&d0, 10000); + multiply256xu32(&t0, 10000); + d1234 = extractIntegerPart256(&t0, integerBits); + } + + // Generate one digit at a time... + while (isLessThan256x256(delta, t)) { + if (p > dest + length - 2) { + dest[0] = '\0'; + return 0; + } + multiply256xu32(&delta, 10); + multiply256xu32(&t, 10); + *p++ = extractIntegerPart256(&t, integerBits) + '0'; + } + + // Adjust the final digit to be closer to the original value + // We've already consumed most of our available precision, and only + // need a couple of integer bits, so we can narrow down to + // 64 bits here. +#if HAVE_UINT128_T + uint64_t deltaHigh64 = delta.high; + uint64_t tHigh64 = t.high; +#else + uint64_t deltaHigh64 = ((uint64_t)delta.elt[7] << 32) + delta.elt[6]; + uint64_t tHigh64 = ((uint64_t)t.elt[7] << 32) + t.elt[6]; +#endif + if (deltaHigh64 >= tHigh64 + ((uint64_t)1 << (64 - integerBits))) { + uint64_t skew; + if (isBoundary) { + skew = deltaHigh64 - deltaHigh64 / 3 - tHigh64; + } else { + skew = deltaHigh64 / 2 - tHigh64; + } + uint64_t one = (uint64_t)(1) << (64 - integerBits); + uint64_t fractionMask = one - 1; + uint64_t oneHalf = one >> 1; + if ((skew & fractionMask) == oneHalf) { + int adjust = (int)(skew >> (64 - integerBits)); + // If the skew is integer + 1/2, round the last digit even + // after adjustment + p[-1] -= adjust; + p[-1] &= ~1; + } else { + // Else round to nearest... + int adjust = (int)((skew + oneHalf) >> (64 - integerBits)); + p[-1] -= adjust; + } + } + + return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent); +} +#endif + +#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +static int finishFormatting(char *dest, size_t length, + char *p, + char *firstOutputChar, + int forceExponential, + int base10Exponent) +{ + int digitCount = p - firstOutputChar - 1; + if (base10Exponent < -4 || forceExponential) { + // Exponential form: convert "0123456" => "1.23456e78" + firstOutputChar[0] = firstOutputChar[1]; + if (digitCount > 1) { + firstOutputChar[1] = '.'; + } else { + p--; + } + // Add exponent at the end + if (p > dest + length - 5) { + dest[0] = '\0'; + return 0; + } + *p++ = 'e'; + if (base10Exponent < 0) { + *p++ = '-'; + base10Exponent = -base10Exponent; + } else { + *p++ = '+'; + } + if (base10Exponent > 99) { + if (base10Exponent > 999) { + if (p > dest + length - 5) { + dest[0] = '\0'; + return 0; + } + memcpy(p, asciiDigitTable + (base10Exponent / 100) * 2, 2); + p += 2; + } else { + if (p > dest + length - 4) { + dest[0] = '\0'; + return 0; + } + *p++ = (base10Exponent / 100) + '0'; + } + base10Exponent %= 100; + } + memcpy(p, asciiDigitTable + base10Exponent * 2, 2); + p += 2; + } else if (base10Exponent < 0) { // "0123456" => "0.00123456" + // Slide digits back in buffer and prepend zeros and a period + if (p > dest + length + base10Exponent - 1) { + dest[0] = '\0'; + return 0; + } + memmove(firstOutputChar - base10Exponent, firstOutputChar, p - firstOutputChar); + memset(firstOutputChar, '0', -base10Exponent); + firstOutputChar[1] = '.'; + p += -base10Exponent; + } else if (base10Exponent + 1 < digitCount) { // "0123456" => "123.456" + // Slide integer digits forward and insert a '.' + memmove(firstOutputChar, firstOutputChar + 1, base10Exponent + 1); + firstOutputChar[base10Exponent + 1] = '.'; + } else { // "0123456" => "12345600.0" + // Slide digits forward 1 and append suitable zeros and '.0' + if (p + base10Exponent - digitCount > dest + length - 3) { + dest[0] = '\0'; + return 0; + } + memmove(firstOutputChar, firstOutputChar + 1, p - firstOutputChar - 1); + p -= 1; + memset(p, '0', base10Exponent - digitCount + 1); + p += base10Exponent - digitCount + 1; + } + *p = '\0'; + return p - dest; +} +#endif + +// ================================================================ +// +// Arithmetic helpers +// +// ================================================================ + +// The core algorithm relies heavily on fixed-point arithmetic with +// 128-bit and 256-bit integer values. (For binary32/64 and +// float80/binary128, respectively.) They also need precise control +// over all rounding. +// +// Note that most arithmetic operations are the same for integers and +// fractions, so we can just use the normal integer operations in most +// places. Multiplication however, is different for fixed-size +// fractions. Integer multiplication preserves the low-order part and +// discards the high-order part (ignoring overflow). Fraction +// multiplication preserves the high-order part and discards the +// low-order part (rounding). So most of the arithmetic helpers here +// are for multiplication. + +// Note: With 64-bit GCC and Clang, we get a noticable performance +// gain by using `__uint128_t`. Otherwise, we have to break things +// down into 32-bit chunks so we don't overflow 64-bit temporaries. + +#if SWIFT_DTOA_BINARY64_SUPPORT +// Multiply a 128-bit fraction by a 64-bit fraction, rounding down. +static swift_uint128_t multiply128x64RoundingDown(swift_uint128_t lhs, uint64_t rhs) { +#if HAVE_UINT128_T + uint64_t lhsl = (uint64_t)lhs; + uint64_t lhsh = (uint64_t)(lhs >> 64); + swift_uint128_t h = (swift_uint128_t)lhsh * rhs; + swift_uint128_t l = (swift_uint128_t)lhsl * rhs; + return h + (l >> 64); +#else + swift_uint128_t result; + static const uint64_t mask32 = UINT32_MAX; + uint64_t rhs0 = rhs & mask32; + uint64_t rhs1 = rhs >> 32; + uint64_t t = (lhs.low) * rhs0; + t >>= 32; + uint64_t a = (lhs.b) * rhs0; + uint64_t b = (lhs.low) * rhs1; + t += a + (b & mask32); + t >>= 32; + t += (b >> 32); + a = lhs.c * rhs0; + b = lhs.b * rhs1; + t += (a & mask32) + (b & mask32); + result.low = t; + t >>= 32; + t += (a >> 32) + (b >> 32); + a = lhs.high * rhs0; + b = lhs.c * rhs1; + t += (a & mask32) + (b & mask32); + result.b = t; + t >>= 32; + t += (a >> 32) + (b >> 32); + t += lhs.high * rhs1; + result.c = t; + result.high = t >> 32; + return result; +#endif +} + +// Multiply a 128-bit fraction by a 64-bit fraction, rounding up. +static swift_uint128_t multiply128x64RoundingUp(swift_uint128_t lhs, uint64_t rhs) { +#if HAVE_UINT128_T + uint64_t lhsl = (uint64_t)lhs; + uint64_t lhsh = (uint64_t)(lhs >> 64); + swift_uint128_t h = (swift_uint128_t)lhsh * rhs; + swift_uint128_t l = (swift_uint128_t)lhsl * rhs; + const static __uint128_t bias = ((__uint128_t)1 << 64) - 1; + return h + ((l + bias) >> 64); +#else + swift_uint128_t result; + static const uint64_t mask32 = UINT32_MAX; + uint64_t rhs0 = rhs & mask32; + uint64_t rhs1 = rhs >> 32; + uint64_t t = (lhs.low) * rhs0 + mask32; + t >>= 32; + uint64_t a = (lhs.b) * rhs0; + uint64_t b = (lhs.low) * rhs1; + t += (a & mask32) + (b & mask32) + mask32; + t >>= 32; + t += (a >> 32) + (b >> 32); + a = lhs.c * rhs0; + b = lhs.b * rhs1; + t += (a & mask32) + (b & mask32); + result.low = t; + t >>= 32; + t += (a >> 32) + (b >> 32); + a = lhs.high * rhs0; + b = lhs.c * rhs1; + t += (a & mask32) + (b & mask32); + result.b = t; + t >>= 32; + t += (a >> 32) + (b >> 32); + t += lhs.high * rhs1; + result.c = t; + result.high = t >> 32; + return result; +#endif +} + +#if !HAVE_UINT128_T +// Multiply a 128-bit fraction by a 32-bit integer in a 32-bit environment. +// (On 64-bit, we use a fast inline macro.) +static void multiply128xu32(swift_uint128_t *lhs, uint32_t rhs) { + uint64_t t = (uint64_t)(lhs->low) * rhs; + lhs->low = (uint32_t)t; + t = (t >> 32) + (uint64_t)(lhs->b) * rhs; + lhs->b = (uint32_t)t; + t = (t >> 32) + (uint64_t)(lhs->c) * rhs; + lhs->c = (uint32_t)t; + t = (t >> 32) + (uint64_t)(lhs->high) * rhs; + lhs->high = (uint32_t)t; +} + +// Compare two 128-bit integers in a 32-bit environment +// (On 64-bit, we use a fast inline macro.) +static int isLessThan128x128(swift_uint128_t lhs, swift_uint128_t rhs) { + return ((lhs.high < rhs.high) + || ((lhs.high == rhs.high) + && ((lhs.c < rhs.c) + || ((lhs.c == rhs.c) + && ((lhs.b < rhs.b) + || ((lhs.b == rhs.b) + && (lhs.low < rhs.low))))))); +} + +// Subtract 128-bit values in a 32-bit environment +static void subtract128x128(swift_uint128_t *lhs, swift_uint128_t rhs) { + uint64_t t = (uint64_t)lhs->low + (~rhs.low) + 1; + lhs->low = (uint32_t)t; + t = (t >> 32) + lhs->b + (~rhs.b); + lhs->b = (uint32_t)t; + t = (t >> 32) + lhs->c + (~rhs.c); + lhs->c = (uint32_t)t; + t = (t >> 32) + lhs->high + (~rhs.high); + lhs->high = (uint32_t)t; +} +#endif + +#if !HAVE_UINT128_T +// Shift a 128-bit integer right, rounding down. +static swift_uint128_t shiftRightRoundingDown128(swift_uint128_t lhs, int shift) { + // Note: Shift is always less than 32 + swift_uint128_t result; + uint64_t t = (uint64_t)lhs.low >> shift; + t += ((uint64_t)lhs.b << (32 - shift)); + result.low = t; + t >>= 32; + t += ((uint64_t)lhs.c << (32 - shift)); + result.b = t; + t >>= 32; + t += ((uint64_t)lhs.high << (32 - shift)); + result.c = t; + t >>= 32; + result.high = t; + return result; +} +#endif + +#if !HAVE_UINT128_T +// Shift a 128-bit integer right, rounding up. +static swift_uint128_t shiftRightRoundingUp128(swift_uint128_t lhs, int shift) { + swift_uint128_t result; + const uint64_t bias = (1 << shift) - 1; + uint64_t t = ((uint64_t)lhs.low + bias) >> shift; + t += ((uint64_t)lhs.b << (32 - shift)); + result.low = t; + t >>= 32; + t += ((uint64_t)lhs.c << (32 - shift)); + result.b = t; + t >>= 32; + t += ((uint64_t)lhs.high << (32 - shift)); + result.c = t; + t >>= 32; + result.high = t; + return result; +} +#endif +#endif + + // Shift a 128-bit integer left, discarding high bits +#if (SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT) && !HAVE_UINT128_T +static void shiftLeft128(swift_uint128_t *lhs, int shift) { + // Note: Shift is always less than 32 + uint64_t t = (uint64_t)lhs->high << (shift + 32); + t += (uint64_t)lhs->c << shift; + lhs->high = t >> 32; + t <<= 32; + t += (uint64_t)lhs->b << shift; + lhs->c = t >> 32; + t <<= 32; + t += (uint64_t)lhs->low << shift; + lhs->b = t >> 32; + lhs->low = t; +} +#endif + +#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +// Multiply a 256-bit fraction by a 32-bit integer. +// This is used in the digit generation to multiply by ten or +// 10,000. Note that rounding is never an issue. +// As used above, this will never overflow. +static void multiply256xu32(swift_uint256_t *lhs, uint32_t rhs) { +#if HAVE_UINT128_T + __uint128_t t = (__uint128_t)lhs->low * rhs; + lhs->low = (uint64_t)t; + t = (t >> 64) + (__uint128_t)lhs->midlow * rhs; + lhs->midlow = (uint64_t)t; + t = (t >> 64) + (__uint128_t)lhs->midhigh * rhs; + lhs->midhigh = (uint64_t)t; + t = (t >> 64) + (__uint128_t)lhs->high * rhs; + lhs->high = (uint64_t)t; +#else + uint64_t t = 0; + for (int i = 0; i < 8; ++i) { + t = (t >> 32) + (uint64_t)lhs->elt[i] * rhs; + lhs->elt[i] = t; + } +#endif +} + +// Multiply a 256-bit fraction by a 128-bit fraction, rounding down. +static void multiply256x128RoundingDown(swift_uint256_t *lhs, swift_uint128_t rhs) { +#if HAVE_UINT128_T + // A full multiply of four 64-bit values by two 64-bit values + // yields six such components. We discard the bottom two (except + // for carries) to get a rounded-down four-element result. + __uint128_t current = (__uint128_t)lhs->low * (uint64_t)rhs; + + current = (current >> 64); + __uint128_t t = (__uint128_t)lhs->low * (rhs >> 64); + current += (uint64_t)t; + __uint128_t next = t >> 64; + t = (__uint128_t)lhs->midlow * (uint64_t)rhs; + current += (uint64_t)t; + next += t >> 64; + + current = next + (current >> 64); + t = (__uint128_t)lhs->midlow * (rhs >> 64); + current += (uint64_t)t; + next = t >> 64; + t = (__uint128_t)lhs->midhigh * (uint64_t)rhs; + current += (uint64_t)t; + next += t >> 64; + lhs->low = (uint64_t)current; + + current = next + (current >> 64); + t = (__uint128_t)lhs->midhigh * (rhs >> 64); + current += (uint64_t)t; + next = t >> 64; + t = (__uint128_t)lhs->high * (uint64_t)rhs; + current += (uint64_t)t; + next += t >> 64; + lhs->midlow = (uint64_t)current; + + current = next + (current >> 64); + t = (__uint128_t)lhs->high * (rhs >> 64); + current += t; + lhs->midhigh = (uint64_t)current; + lhs->high = (uint64_t)(current >> 64); +#else + uint64_t a, b, c, d; // temporaries + // Eight 32-bit values multiplied by 4 32-bit values. Oh my. + static const uint64_t mask32 = UINT32_MAX; + uint64_t t = 0; + + a = (uint64_t)lhs->elt[0] * rhs.low; + t += (a & mask32); + t >>= 32; + t += (a >> 32); + + a = (uint64_t)lhs->elt[0] * rhs.b; + b = (uint64_t)lhs->elt[1] * rhs.low; + t += (a & mask32) + (b & mask32); + t >>= 32; + t += (a >> 32) + (b >> 32); + + a = (uint64_t)lhs->elt[0] * rhs.c; + b = (uint64_t)lhs->elt[1] * rhs.b; + c = (uint64_t)lhs->elt[2] * rhs.low; + t += (a & mask32) + (b & mask32) + (c & mask32); + t >>= 32; + t += (a >> 32) + (b >> 32) + (c >> 32); + + a = (uint64_t)lhs->elt[0] * rhs.high; + b = (uint64_t)lhs->elt[1] * rhs.c; + c = (uint64_t)lhs->elt[2] * rhs.b; + d = (uint64_t)lhs->elt[3] * rhs.low; + t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32); + t >>= 32; + t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); + + for (int i = 0; i < 4; ++i) { + a = (uint64_t)lhs->elt[i + 1] * rhs.high; + b = (uint64_t)lhs->elt[i + 2] * rhs.c; + c = (uint64_t)lhs->elt[i + 3] * rhs.b; + d = (uint64_t)lhs->elt[i + 4] * rhs.low; + t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32); + lhs->elt[i] = t; + t >>= 32; + t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); + } + + a = (uint64_t)lhs->elt[5] * rhs.high; + b = (uint64_t)lhs->elt[6] * rhs.c; + c = (uint64_t)lhs->elt[7] * rhs.b; + t += (a & mask32) + (b & mask32) + (c & mask32); + lhs->elt[4] = t; + t >>= 32; + t += (a >> 32) + (b >> 32) + (c >> 32); + + a = (uint64_t)lhs->elt[6] * rhs.high; + b = (uint64_t)lhs->elt[7] * rhs.c; + t += (a & mask32) + (b & mask32); + lhs->elt[5] = t; + t >>= 32; + t += (a >> 32) + (b >> 32); + + t += (uint64_t)lhs->elt[7] * rhs.high; + lhs->elt[6] = t; + lhs->elt[7] = t >> 32; +#endif +} + +// Multiply a 256-bit fraction by a 128-bit fraction, rounding up. +static void multiply256x128RoundingUp(swift_uint256_t *lhs, swift_uint128_t rhs) { +#if HAVE_UINT128_T + // Same as the rounding-down version, but we add + // UINT128_MAX to the bottom two to force an extra + // carry if they are non-zero. + swift_uint128_t current = (swift_uint128_t)lhs->low * (uint64_t)rhs; + current += UINT64_MAX; + + current = (current >> 64); + swift_uint128_t t = (swift_uint128_t)lhs->low * (rhs >> 64); + current += (uint64_t)t; + swift_uint128_t next = t >> 64; + t = (swift_uint128_t)lhs->midlow * (uint64_t)rhs; + current += (uint64_t)t; + next += t >> 64; + // Round up by adding UINT128_MAX (upper half) + current += UINT64_MAX; + + current = next + (current >> 64); + t = (swift_uint128_t)lhs->midlow * (rhs >> 64); + current += (uint64_t)t; + next = t >> 64; + t = (swift_uint128_t)lhs->midhigh * (uint64_t)rhs; + current += (uint64_t)t; + next += t >> 64; + lhs->low = (uint64_t)current; + + current = next + (current >> 64); + t = (swift_uint128_t)lhs->midhigh * (rhs >> 64); + current += (uint64_t)t; + next = t >> 64; + t = (swift_uint128_t)lhs->high * (uint64_t)rhs; + current += (uint64_t)t; + next += t >> 64; + lhs->midlow = (uint64_t)current; + + current = next + (current >> 64); + t = (swift_uint128_t)lhs->high * (rhs >> 64); + current += t; + lhs->midhigh = (uint64_t)current; + lhs->high = (uint64_t)(current >> 64); +#else + uint64_t a, b, c, d; // temporaries + // Eight 32-bit values multiplied by 4 32-bit values. Oh my. + static const uint64_t mask32 = UINT32_MAX; + uint64_t t = 0; + + a = (uint64_t)lhs->elt[0] * rhs.low + mask32; + t += (a & mask32); + t >>= 32; + t += (a >> 32); + + a = (uint64_t)lhs->elt[0] * rhs.b; + b = (uint64_t)lhs->elt[1] * rhs.low; + t += (a & mask32) + (b & mask32) + mask32; + t >>= 32; + t += (a >> 32) + (b >> 32); + + a = (uint64_t)lhs->elt[0] * rhs.c; + b = (uint64_t)lhs->elt[1] * rhs.b; + c = (uint64_t)lhs->elt[2] * rhs.low; + t += (a & mask32) + (b & mask32) + (c & mask32) + mask32; + t >>= 32; + t += (a >> 32) + (b >> 32) + (c >> 32); + + a = (uint64_t)lhs->elt[0] * rhs.high; + b = (uint64_t)lhs->elt[1] * rhs.c; + c = (uint64_t)lhs->elt[2] * rhs.b; + d = (uint64_t)lhs->elt[3] * rhs.low; + t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32) + mask32; + t >>= 32; + t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); + + for (int i = 0; i < 4; ++i) { + a = (uint64_t)lhs->elt[i + 1] * rhs.high; + b = (uint64_t)lhs->elt[i + 2] * rhs.c; + c = (uint64_t)lhs->elt[i + 3] * rhs.b; + d = (uint64_t)lhs->elt[i + 4] * rhs.low; + t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32); + lhs->elt[i] = t; + t >>= 32; + t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); + } + + a = (uint64_t)lhs->elt[5] * rhs.high; + b = (uint64_t)lhs->elt[6] * rhs.c; + c = (uint64_t)lhs->elt[7] * rhs.b; + t += (a & mask32) + (b & mask32) + (c & mask32); + lhs->elt[4] = t; + t >>= 32; + t += (a >> 32) + (b >> 32) + (c >> 32); + + a = (uint64_t)lhs->elt[6] * rhs.high; + b = (uint64_t)lhs->elt[7] * rhs.c; + t += (a & mask32) + (b & mask32); + lhs->elt[5] = t; + t >>= 32; + t += (a >> 32) + (b >> 32); + + t += (uint64_t)lhs->elt[7] * rhs.high; + lhs->elt[6] = t; + lhs->elt[7] = t >> 32; + +#endif +} + +// Subtract two 256-bit integers or fractions. +static void subtract256x256(swift_uint256_t *lhs, swift_uint256_t rhs) { +#if HAVE_UINT128_T + swift_uint128_t t = (swift_uint128_t)lhs->low + (~rhs.low) + 1; + lhs->low = t; + t = (t >> 64) + lhs->midlow + (~rhs.midlow); + lhs->midlow = t; + t = (t >> 64) + lhs->midhigh + (~rhs.midhigh); + lhs->midhigh = t; + lhs->high += (t >> 64) + (~rhs.high); +#else + uint64_t t = ((uint64_t)1) << 32; + for (int i = 0; i < 8; i++) { + t = (t >> 32) + lhs->elt[i] + (~rhs.elt[i]); + lhs->elt[i] = t; + } +#endif +} + +// Compare two 256-bit integers or fractions. +static int isLessThan256x256(swift_uint256_t lhs, swift_uint256_t rhs) { +#if HAVE_UINT128_T + return (lhs.high < rhs.high) + || (lhs.high == rhs.high + && (lhs.midhigh < rhs.midhigh + || (lhs.midhigh == rhs.midhigh + && (lhs.midlow < rhs.midlow + || (lhs.midlow == rhs.midlow + && lhs.low < rhs.low))))); +#else + for (int i = 7; i >= 0; i--) { + if (lhs.elt[i] < rhs.elt[i]) { + return true; + } else if (lhs.elt[i] > rhs.elt[i]) { + return false; + } + } + return false; +#endif +} + +// Shift a 256-bit integer right (by less than 32 bits!), rounding down. +static void shiftRightRoundingDown256(swift_uint256_t *lhs, int shift) { +#if HAVE_UINT128_T + __uint128_t t = (__uint128_t)lhs->low >> shift; + t += ((__uint128_t)lhs->midlow << (64 - shift)); + lhs->low = t; + t >>= 64; + t += ((__uint128_t)lhs->midhigh << (64 - shift)); + lhs->midlow = t; + t >>= 64; + t += ((__uint128_t)lhs->high << (64 - shift)); + lhs->midhigh = t; + t >>= 64; + lhs->high = t; +#else + uint64_t t = (uint64_t)lhs->elt[0] >> shift; + for (int i = 0; i < 7; ++i) { + t += ((uint64_t)lhs->elt[i + 1] << (32 - shift)); + lhs->elt[i] = t; + t >>= 32; + } + lhs->elt[7] = t; +#endif +} + +// Shift a 256-bit integer right, rounding up. +// Note: The shift will always be less than 20. Someday, that +// might suggest a way to further optimize this. +static void shiftRightRoundingUp256(swift_uint256_t *lhs, int shift) { +#if HAVE_UINT128_T + const uint64_t bias = (1 << shift) - 1; + __uint128_t t = ((__uint128_t)lhs->low + bias) >> shift; + t += ((__uint128_t)lhs->midlow << (64 - shift)); + lhs->low = t; + t >>= 64; + t += ((__uint128_t)lhs->midhigh << (64 - shift)); + lhs->midlow = t; + t >>= 64; + t += ((__uint128_t)lhs->high << (64 - shift)); + lhs->midhigh = t; + t >>= 64; + lhs->high = t; +#else + const uint64_t bias = (1 << shift) - 1; + uint64_t t = ((uint64_t)lhs->elt[0] + bias) >> shift; + for (int i = 0; i < 7; ++i) { + t += ((uint64_t)lhs->elt[i + 1] << (32 - shift)); + lhs->elt[i] = t; + t >>= 32; + } + lhs->elt[7] = t; +#endif +} +#endif + +// ================================================================ +// +// Power of 10 calculation +// +// ================================================================ + +// +// ------------ Power-of-10 tables. -------------------------- +// +// Grisu-style algorithms rely on being able to rapidly +// find a high-precision approximation of any power of 10. +// These values were computed by a simple script that +// relied on Python's excellent variable-length +// integer support. + +#if SWIFT_DTOA_BINARY32_SUPPORT +// Table with negative powers of 10 to 64 bits +// +// Table size: 320 bytes +static uint64_t powersOf10_negativeBinary32[] = { + 0x8b61313bbabce2c6ULL, // x 2^-132 ~= 10^-40 + 0xae397d8aa96c1b77ULL, // x 2^-129 ~= 10^-39 + 0xd9c7dced53c72255ULL, // x 2^-126 ~= 10^-38 + 0x881cea14545c7575ULL, // x 2^-122 ~= 10^-37 + 0xaa242499697392d2ULL, // x 2^-119 ~= 10^-36 + 0xd4ad2dbfc3d07787ULL, // x 2^-116 ~= 10^-35 + 0x84ec3c97da624ab4ULL, // x 2^-112 ~= 10^-34 + 0xa6274bbdd0fadd61ULL, // x 2^-109 ~= 10^-33 + 0xcfb11ead453994baULL, // x 2^-106 ~= 10^-32 + 0x81ceb32c4b43fcf4ULL, // x 2^-102 ~= 10^-31 + 0xa2425ff75e14fc31ULL, // x 2^-99 ~= 10^-30 + 0xcad2f7f5359a3b3eULL, // x 2^-96 ~= 10^-29 + 0xfd87b5f28300ca0dULL, // x 2^-93 ~= 10^-28 + 0x9e74d1b791e07e48ULL, // x 2^-89 ~= 10^-27 + 0xc612062576589ddaULL, // x 2^-86 ~= 10^-26 + 0xf79687aed3eec551ULL, // x 2^-83 ~= 10^-25 + 0x9abe14cd44753b52ULL, // x 2^-79 ~= 10^-24 + 0xc16d9a0095928a27ULL, // x 2^-76 ~= 10^-23 + 0xf1c90080baf72cb1ULL, // x 2^-73 ~= 10^-22 + 0x971da05074da7beeULL, // x 2^-69 ~= 10^-21 + 0xbce5086492111aeaULL, // x 2^-66 ~= 10^-20 + 0xec1e4a7db69561a5ULL, // x 2^-63 ~= 10^-19 + 0x9392ee8e921d5d07ULL, // x 2^-59 ~= 10^-18 + 0xb877aa3236a4b449ULL, // x 2^-56 ~= 10^-17 + 0xe69594bec44de15bULL, // x 2^-53 ~= 10^-16 + 0x901d7cf73ab0acd9ULL, // x 2^-49 ~= 10^-15 + 0xb424dc35095cd80fULL, // x 2^-46 ~= 10^-14 + 0xe12e13424bb40e13ULL, // x 2^-43 ~= 10^-13 + 0x8cbccc096f5088cbULL, // x 2^-39 ~= 10^-12 + 0xafebff0bcb24aafeULL, // x 2^-36 ~= 10^-11 + 0xdbe6fecebdedd5beULL, // x 2^-33 ~= 10^-10 + 0x89705f4136b4a597ULL, // x 2^-29 ~= 10^-9 + 0xabcc77118461cefcULL, // x 2^-26 ~= 10^-8 + 0xd6bf94d5e57a42bcULL, // x 2^-23 ~= 10^-7 + 0x8637bd05af6c69b5ULL, // x 2^-19 ~= 10^-6 + 0xa7c5ac471b478423ULL, // x 2^-16 ~= 10^-5 + 0xd1b71758e219652bULL, // x 2^-13 ~= 10^-4 + 0x83126e978d4fdf3bULL, // x 2^-9 ~= 10^-3 + 0xa3d70a3d70a3d70aULL, // x 2^-6 ~= 10^-2 + 0xccccccccccccccccULL, // x 2^-3 ~= 10^-1 +}; +#endif + +#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +// Tables with powers of 10 +// +// The constant powers of 10 here represent pure fractions +// with a binary point at the far left. (Each number in +// this first table is implicitly divided by 2^128.) +// +// Table size: 896 bytes +// +// A 64-bit significand allows us to exactly represent powers of 10 up +// to 10^27. In 128 bits, we can exactly represent powers of 10 up to +// 10^55. As with all of these tables, the binary exponent is not stored; +// it is computed by the `binaryExponentFor10ToThe(p)` function. +static const uint64_t powersOf10_Exact128[56 * 2] = { + // Low order ... high order + 0x0000000000000000ULL, 0x8000000000000000ULL, // x 2^1 == 10^0 exactly + 0x0000000000000000ULL, 0xa000000000000000ULL, // x 2^4 == 10^1 exactly + 0x0000000000000000ULL, 0xc800000000000000ULL, // x 2^7 == 10^2 exactly + 0x0000000000000000ULL, 0xfa00000000000000ULL, // x 2^10 == 10^3 exactly + 0x0000000000000000ULL, 0x9c40000000000000ULL, // x 2^14 == 10^4 exactly + 0x0000000000000000ULL, 0xc350000000000000ULL, // x 2^17 == 10^5 exactly + 0x0000000000000000ULL, 0xf424000000000000ULL, // x 2^20 == 10^6 exactly + 0x0000000000000000ULL, 0x9896800000000000ULL, // x 2^24 == 10^7 exactly + 0x0000000000000000ULL, 0xbebc200000000000ULL, // x 2^27 == 10^8 exactly + 0x0000000000000000ULL, 0xee6b280000000000ULL, // x 2^30 == 10^9 exactly + 0x0000000000000000ULL, 0x9502f90000000000ULL, // x 2^34 == 10^10 exactly + 0x0000000000000000ULL, 0xba43b74000000000ULL, // x 2^37 == 10^11 exactly + 0x0000000000000000ULL, 0xe8d4a51000000000ULL, // x 2^40 == 10^12 exactly + 0x0000000000000000ULL, 0x9184e72a00000000ULL, // x 2^44 == 10^13 exactly + 0x0000000000000000ULL, 0xb5e620f480000000ULL, // x 2^47 == 10^14 exactly + 0x0000000000000000ULL, 0xe35fa931a0000000ULL, // x 2^50 == 10^15 exactly + 0x0000000000000000ULL, 0x8e1bc9bf04000000ULL, // x 2^54 == 10^16 exactly + 0x0000000000000000ULL, 0xb1a2bc2ec5000000ULL, // x 2^57 == 10^17 exactly + 0x0000000000000000ULL, 0xde0b6b3a76400000ULL, // x 2^60 == 10^18 exactly + 0x0000000000000000ULL, 0x8ac7230489e80000ULL, // x 2^64 == 10^19 exactly + 0x0000000000000000ULL, 0xad78ebc5ac620000ULL, // x 2^67 == 10^20 exactly + 0x0000000000000000ULL, 0xd8d726b7177a8000ULL, // x 2^70 == 10^21 exactly + 0x0000000000000000ULL, 0x878678326eac9000ULL, // x 2^74 == 10^22 exactly + 0x0000000000000000ULL, 0xa968163f0a57b400ULL, // x 2^77 == 10^23 exactly + 0x0000000000000000ULL, 0xd3c21bcecceda100ULL, // x 2^80 == 10^24 exactly + 0x0000000000000000ULL, 0x84595161401484a0ULL, // x 2^84 == 10^25 exactly + 0x0000000000000000ULL, 0xa56fa5b99019a5c8ULL, // x 2^87 == 10^26 exactly + 0x0000000000000000ULL, 0xcecb8f27f4200f3aULL, // x 2^90 == 10^27 exactly + 0x4000000000000000ULL, 0x813f3978f8940984ULL, // x 2^94 == 10^28 exactly + 0x5000000000000000ULL, 0xa18f07d736b90be5ULL, // x 2^97 == 10^29 exactly + 0xa400000000000000ULL, 0xc9f2c9cd04674edeULL, // x 2^100 == 10^30 exactly + 0x4d00000000000000ULL, 0xfc6f7c4045812296ULL, // x 2^103 == 10^31 exactly + 0xf020000000000000ULL, 0x9dc5ada82b70b59dULL, // x 2^107 == 10^32 exactly + 0x6c28000000000000ULL, 0xc5371912364ce305ULL, // x 2^110 == 10^33 exactly + 0xc732000000000000ULL, 0xf684df56c3e01bc6ULL, // x 2^113 == 10^34 exactly + 0x3c7f400000000000ULL, 0x9a130b963a6c115cULL, // x 2^117 == 10^35 exactly + 0x4b9f100000000000ULL, 0xc097ce7bc90715b3ULL, // x 2^120 == 10^36 exactly + 0x1e86d40000000000ULL, 0xf0bdc21abb48db20ULL, // x 2^123 == 10^37 exactly + 0x1314448000000000ULL, 0x96769950b50d88f4ULL, // x 2^127 == 10^38 exactly + 0x17d955a000000000ULL, 0xbc143fa4e250eb31ULL, // x 2^130 == 10^39 exactly + 0x5dcfab0800000000ULL, 0xeb194f8e1ae525fdULL, // x 2^133 == 10^40 exactly + 0x5aa1cae500000000ULL, 0x92efd1b8d0cf37beULL, // x 2^137 == 10^41 exactly + 0xf14a3d9e40000000ULL, 0xb7abc627050305adULL, // x 2^140 == 10^42 exactly + 0x6d9ccd05d0000000ULL, 0xe596b7b0c643c719ULL, // x 2^143 == 10^43 exactly + 0xe4820023a2000000ULL, 0x8f7e32ce7bea5c6fULL, // x 2^147 == 10^44 exactly + 0xdda2802c8a800000ULL, 0xb35dbf821ae4f38bULL, // x 2^150 == 10^45 exactly + 0xd50b2037ad200000ULL, 0xe0352f62a19e306eULL, // x 2^153 == 10^46 exactly + 0x4526f422cc340000ULL, 0x8c213d9da502de45ULL, // x 2^157 == 10^47 exactly + 0x9670b12b7f410000ULL, 0xaf298d050e4395d6ULL, // x 2^160 == 10^48 exactly + 0x3c0cdd765f114000ULL, 0xdaf3f04651d47b4cULL, // x 2^163 == 10^49 exactly + 0xa5880a69fb6ac800ULL, 0x88d8762bf324cd0fULL, // x 2^167 == 10^50 exactly + 0x8eea0d047a457a00ULL, 0xab0e93b6efee0053ULL, // x 2^170 == 10^51 exactly + 0x72a4904598d6d880ULL, 0xd5d238a4abe98068ULL, // x 2^173 == 10^52 exactly + 0x47a6da2b7f864750ULL, 0x85a36366eb71f041ULL, // x 2^177 == 10^53 exactly + 0x999090b65f67d924ULL, 0xa70c3c40a64e6c51ULL, // x 2^180 == 10^54 exactly + 0xfff4b4e3f741cf6dULL, 0xd0cf4b50cfe20765ULL, // x 2^183 == 10^55 exactly +}; +#endif + +#if SWIFT_DTOA_BINARY64_SUPPORT +// Rounded values supporting the full range of binary64 +// +// Table size: 464 bytes +// +// We only store every 28th power of ten here. +// We can multiply by an exact 64-bit power of +// ten from the table above to reconstruct the +// significand for any power of 10. +static const uint64_t powersOf10_Binary64[] = { + // low-order half, high-order half + 0x3931b850df08e738, 0x95fe7e07c91efafa, // x 2^-1328 ~= 10^-400 + 0xba954f8e758fecb3, 0x9774919ef68662a3, // x 2^-1235 ~= 10^-372 + 0x9028bed2939a635c, 0x98ee4a22ecf3188b, // x 2^-1142 ~= 10^-344 + 0x47b233c92125366e, 0x9a6bb0aa55653b2d, // x 2^-1049 ~= 10^-316 + 0x4ee367f9430aec32, 0x9becce62836ac577, // x 2^-956 ~= 10^-288 + 0x6f773fc3603db4a9, 0x9d71ac8fada6c9b5, // x 2^-863 ~= 10^-260 + 0xc47bc5014a1a6daf, 0x9efa548d26e5a6e1, // x 2^-770 ~= 10^-232 + 0x80e8a40eccd228a4, 0xa086cfcd97bf97f3, // x 2^-677 ~= 10^-204 + 0xb8ada00e5a506a7c, 0xa21727db38cb002f, // x 2^-584 ~= 10^-176 + 0xc13e60d0d2e0ebba, 0xa3ab66580d5fdaf5, // x 2^-491 ~= 10^-148 + 0xc2974eb4ee658828, 0xa54394fe1eedb8fe, // x 2^-398 ~= 10^-120 + 0xcb4ccd500f6bb952, 0xa6dfbd9fb8e5b88e, // x 2^-305 ~= 10^-92 + 0x3f2398d747b36224, 0xa87fea27a539e9a5, // x 2^-212 ~= 10^-64 + 0xdde50bd1d5d0b9e9, 0xaa242499697392d2, // x 2^-119 ~= 10^-36 + 0xfdc20d2b36ba7c3d, 0xabcc77118461cefc, // x 2^-26 ~= 10^-8 + 0x0000000000000000, 0xad78ebc5ac620000, // x 2^67 == 10^20 exactly + 0x9670b12b7f410000, 0xaf298d050e4395d6, // x 2^160 == 10^48 exactly + 0x3b25a55f43294bcb, 0xb0de65388cc8ada8, // x 2^253 ~= 10^76 + 0x58edec91ec2cb657, 0xb2977ee300c50fe7, // x 2^346 ~= 10^104 + 0x29babe4598c311fb, 0xb454e4a179dd1877, // x 2^439 ~= 10^132 + 0x577b986b314d6009, 0xb616a12b7fe617aa, // x 2^532 ~= 10^160 + 0x0c11ed6d538aeb2f, 0xb7dcbf5354e9bece, // x 2^625 ~= 10^188 + 0x6d953e2bd7173692, 0xb9a74a0637ce2ee1, // x 2^718 ~= 10^216 + 0x9d6d1ad41abe37f1, 0xbb764c4ca7a4440f, // x 2^811 ~= 10^244 + 0x4b2d8644d8a74e18, 0xbd49d14aa79dbc82, // x 2^904 ~= 10^272 + 0xe0470a63e6bd56c3, 0xbf21e44003acdd2c, // x 2^997 ~= 10^300 + 0x505f522e53053ff2, 0xc0fe908895cf3b44, // x 2^1090 ~= 10^328 + 0xcca845ab2beafa9a, 0xc2dfe19c8c055535, // x 2^1183 ~= 10^356 + 0x1027fff56784f444, 0xc4c5e310aef8aa17, // x 2^1276 ~= 10^384 +}; +#endif + +#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +// Every 56th power of 10 across the range of Float80/Binary128 +// +// Table size: 5,728 bytes +// +// Note: We could cut this in half at the cost of one additional +// 256-bit multiply by only storing the positive values and +// multiplying by 10^-4984 to obtain the negative ones. +static const uint64_t powersOf10_Binary128[] = { + // Low-order ... high-order + 0xaec2e6aff96b46aeULL, 0xf91044c2eff84750ULL, 0x2b55c9e70e00c557ULL, 0xb6536903bf8f2bdaULL, // x 2^-16556 ~= 10^-4984 + 0xda1b3c3dd3889587ULL, 0x73a7380aba84a6b1ULL, 0xbddb2dfde3f8a6e3ULL, 0xb9e5428330737362ULL, // x 2^-16370 ~= 10^-4928 + 0xa2d23c57cfebb9ecULL, 0x9f165c039ead6d77ULL, 0x88227fdfc13ab53dULL, 0xbd89006346a9a34dULL, // x 2^-16184 ~= 10^-4872 + 0x333d510cf27e5a5ULL, 0x4e3cc383eaa17b7bULL, 0xe05fe4207ca3d508ULL, 0xc13efc51ade7df64ULL, // x 2^-15998 ~= 10^-4816 + 0xff242c569bc1f539ULL, 0x5c67ba58680c4cceULL, 0x3c55f3f947fef0e9ULL, 0xc50791bd8dd72edbULL, // x 2^-15812 ~= 10^-4760 + 0xe4b75ae27bec50bfULL, 0x25b0419765fdfcdbULL, 0x915564d8ab057eeULL, 0xc8e31de056f89c19ULL, // x 2^-15626 ~= 10^-4704 + 0x548b1e80a94f3434ULL, 0xe418e9217ce83755ULL, 0x801e38463183fc88ULL, 0xccd1ffc6bba63e21ULL, // x 2^-15440 ~= 10^-4648 + 0x541950a0fdc2b4d9ULL, 0xeea173da1f0eb7b4ULL, 0xcfadf6b2aa7c4f43ULL, 0xd0d49859d60d40a3ULL, // x 2^-15254 ~= 10^-4592 + 0x7e64501be95ad76bULL, 0x451e855d8acef835ULL, 0x9e601e707a2c3488ULL, 0xd4eb4a687c0253e8ULL, // x 2^-15068 ~= 10^-4536 + 0xdadd9645f360cb51ULL, 0xf290163350ecb3ebULL, 0xa8edffdccfe4db4bULL, 0xd9167ab0c1965798ULL, // x 2^-14882 ~= 10^-4480 + 0x7e447db3018ffbdfULL, 0x4fa1860c08a85923ULL, 0xb17cd86e7fcece75ULL, 0xdd568fe9ab559344ULL, // x 2^-14696 ~= 10^-4424 + 0x61cd4655bf64d265ULL, 0xb19fd88fe285b3bcULL, 0x1151250681d59705ULL, 0xe1abf2cd11206610ULL, // x 2^-14510 ~= 10^-4368 + 0xa5703f5ce7a619ecULL, 0x361243a84b55574dULL, 0x25a8e1e5dbb41d6ULL, 0xe6170e21b2910457ULL, // x 2^-14324 ~= 10^-4312 + 0xb93897a6cf5d3e61ULL, 0x18746fcc6a190db9ULL, 0x66e849253e5da0c2ULL, 0xea984ec57de69f13ULL, // x 2^-14138 ~= 10^-4256 + 0x309043d12ab5b0acULL, 0x79c93cff11f09319ULL, 0xf5a7800f23ef67b8ULL, 0xef3023b80a732d93ULL, // x 2^-13952 ~= 10^-4200 + 0xa3baa84c049b52b9ULL, 0xbec466ee1b586342ULL, 0xe85fc7f4edbd3caULL, 0xf3defe25478e074aULL, // x 2^-13766 ~= 10^-4144 + 0xd1f4628316b15c7aULL, 0xae16192410d3135eULL, 0x4268a54f70bd28c4ULL, 0xf8a551706112897cULL, // x 2^-13580 ~= 10^-4088 + 0x9eb9296cc5749dbaULL, 0x48324e275376dfddULL, 0x5052e9289f0f2333ULL, 0xfd83933eda772c0bULL, // x 2^-13394 ~= 10^-4032 + 0xff6aae669a5a0d8aULL, 0x24fed95087b9006eULL, 0x1b02378a405b421ULL, 0x813d1dc1f0c754d6ULL, // x 2^-13207 ~= 10^-3976 + 0xf993f18de00dc89bULL, 0x15617da021b89f92ULL, 0xb782db1fc6aba49bULL, 0x83c4e245ed051dc1ULL, // x 2^-13021 ~= 10^-3920 + 0xc6a0d64a712172b1ULL, 0x2217669197ac1504ULL, 0x4250be2eeba87d15ULL, 0x86595584116caf3cULL, // x 2^-12835 ~= 10^-3864 + 0xbdc0c67a220687bULL, 0x44a66a6d6fd6537bULL, 0x3f1f93f1943ca9b6ULL, 0x88fab70d8b44952aULL, // x 2^-12649 ~= 10^-3808 + 0xb60b57164ad28122ULL, 0xde5bd4572c25a830ULL, 0x2c87f18b39478aa2ULL, 0x8ba947b223e5783eULL, // x 2^-12463 ~= 10^-3752 + 0xbd59568efdb9bfeeULL, 0x292f8f2c98d7f44cULL, 0x4054f5360249ebd1ULL, 0x8e6549867da7d11aULL, // x 2^-12277 ~= 10^-3696 + 0x9fa0721e66791accULL, 0x1789061d717d454cULL, 0xc1187fa0c18adbbeULL, 0x912effea7015b2c5ULL, // x 2^-12091 ~= 10^-3640 + 0x982b64e953ac4e27ULL, 0x45efb05f20cf48b3ULL, 0x4b4de34e0ebc3e06ULL, 0x9406af8f83fd6265ULL, // x 2^-11905 ~= 10^-3584 + 0xa53f5950eec21dcaULL, 0x3bd8754763bdbca1ULL, 0xac73f0226eff5ea1ULL, 0x96ec9e7f9004839bULL, // x 2^-11719 ~= 10^-3528 + 0x320e19f88f1161b7ULL, 0x72e93fe0cce7cfd9ULL, 0x2184706ea46a4c38ULL, 0x99e11423765ec1d0ULL, // x 2^-11533 ~= 10^-3472 + 0x491aba48dfc0e36eULL, 0xd3de560ee34022b2ULL, 0xddadb80577b906bdULL, 0x9ce4594a044e0f1bULL, // x 2^-11347 ~= 10^-3416 + 0x6789d038697142fULL, 0x7a466a75be73db21ULL, 0x60dbd8aa443b560fULL, 0x9ff6b82ef415d222ULL, // x 2^-11161 ~= 10^-3360 + 0x40ed8056af76ac43ULL, 0x8251c601e346456ULL, 0x7401c6f091f87727ULL, 0xa3187c82120dace6ULL, // x 2^-10975 ~= 10^-3304 + 0x8c643ee307bffec6ULL, 0xf369a11c6f66c05aULL, 0x4d5b32f713d7f476ULL, 0xa649f36e8583e81aULL, // x 2^-10789 ~= 10^-3248 + 0xe32f5e080e36b4beULL, 0x3adf30ff2eb163d4ULL, 0xb4b39dd9ddb8d317ULL, 0xa98b6ba23e2300c7ULL, // x 2^-10603 ~= 10^-3192 + 0x6b9d538c192cfb1bULL, 0x1c5af3bd4d2c60b5ULL, 0xec41c1793d69d0d1ULL, 0xacdd3555869159d1ULL, // x 2^-10417 ~= 10^-3136 + 0x1adadaeedf7d699cULL, 0x71043692494aa743ULL, 0x3ca5a7540d9d56c9ULL, 0xb03fa252bd05a815ULL, // x 2^-10231 ~= 10^-3080 + 0xec3e4e5fc6b03617ULL, 0x47c9b16afe8fdf74ULL, 0x92e1bc1fbb33f18dULL, 0xb3b305fe328e571fULL, // x 2^-10045 ~= 10^-3024 + 0x1d42fa68b12bdb23ULL, 0xac46a7b3f2b4b34eULL, 0xa908fd4a88728b6aULL, 0xb737b55e31cdde04ULL, // x 2^-9859 ~= 10^-2968 + 0x887dede507f2b618ULL, 0x359a8fa0d014b9a7ULL, 0x7c4c65d15c614c56ULL, 0xbace07232df1c802ULL, // x 2^-9673 ~= 10^-2912 + 0x504708e718b4b669ULL, 0xfb4d9440822af452ULL, 0xef84cc99cb4c5d17ULL, 0xbe7653b01aae13e5ULL, // x 2^-9487 ~= 10^-2856 + 0x5b7977525516bff0ULL, 0x75913092420c9b35ULL, 0xcfc147ade4843a24ULL, 0xc230f522ee0a7fc2ULL, // x 2^-9301 ~= 10^-2800 + 0xad5d11883cc1302bULL, 0x860a754894b9a0bcULL, 0x4668677d5f46c29bULL, 0xc5fe475d4cd35cffULL, // x 2^-9115 ~= 10^-2744 + 0x42032f9f971bfc07ULL, 0x9fb576046ab35018ULL, 0x474b3cb1fe1d6a7fULL, 0xc9dea80d6283a34cULL, // x 2^-8929 ~= 10^-2688 + 0xd3e7fbb72403a4ddULL, 0x8ca223055819af54ULL, 0xd6ea3b733029ef0bULL, 0xcdd276b6e582284fULL, // x 2^-8743 ~= 10^-2632 + 0xba2431d885f2b7d9ULL, 0xc9879fc42869f610ULL, 0x3736730a9e47fef8ULL, 0xd1da14bc489025eaULL, // x 2^-8557 ~= 10^-2576 + 0xa11edbcd65dd1844ULL, 0xcb8edae81a295887ULL, 0x3d24e68dc1027246ULL, 0xd5f5e5681a4b9285ULL, // x 2^-8371 ~= 10^-2520 + 0xa0f076652f69ad08ULL, 0x9d19c341f5f42f2aULL, 0x742ab8f3864562c8ULL, 0xda264df693ac3e30ULL, // x 2^-8185 ~= 10^-2464 + 0x29f760ef115f2824ULL, 0xe0ee47c041c9de0fULL, 0x8c119f3680212413ULL, 0xde6bb59f56672cdaULL, // x 2^-7999 ~= 10^-2408 + 0x8b90230b3409c9d3ULL, 0x9d76eef2c1543e65ULL, 0x43190b523f872b9cULL, 0xe2c6859f5c284230ULL, // x 2^-7813 ~= 10^-2352 + 0xd44ce9993bc6611eULL, 0x777c9b2dfbede079ULL, 0x2a0969bf88679396ULL, 0xe7372943179706fcULL, // x 2^-7627 ~= 10^-2296 + 0xe8c5f5a63fd0fbd1ULL, 0xccc12293f1d7a58ULL, 0x131565be33dda91aULL, 0xebbe0df0c8201ac5ULL, // x 2^-7441 ~= 10^-2240 + 0xdb97988dd6b776f4ULL, 0xeb2106f435f7e1d5ULL, 0xccfb1cc2ef1f44deULL, 0xf05ba3330181c750ULL, // x 2^-7255 ~= 10^-2184 + 0x2fcbc8df94a1d54bULL, 0x796d0a8120801513ULL, 0x5f8385b3a882ff4cULL, 0xf5105ac3681f2716ULL, // x 2^-7069 ~= 10^-2128 + 0xc8700c11071a40f5ULL, 0x23cb9e9df9331fe4ULL, 0x166c15f456786c27ULL, 0xf9dca895a3226409ULL, // x 2^-6883 ~= 10^-2072 + 0x9589f4637a50cbb5ULL, 0xea8242b0030e4a51ULL, 0x6c656c3b1f2c9d91ULL, 0xfec102e2857bc1f9ULL, // x 2^-6697 ~= 10^-2016 + 0xc4be56c83349136cULL, 0x6188db81ac8e775dULL, 0xfa70b9a2ca60b004ULL, 0x81def119b76837c8ULL, // x 2^-6510 ~= 10^-1960 + 0xb85d39054658b363ULL, 0xe7df06bc613fda21ULL, 0x6a22490e8e9ec98bULL, 0x8469e0b6f2b8bd9bULL, // x 2^-6324 ~= 10^-1904 + 0x800b1e1349fef248ULL, 0x469cfd2e6ca32a77ULL, 0x69138459b0fa72d4ULL, 0x87018eefb53c6325ULL, // x 2^-6138 ~= 10^-1848 + 0xb62593291c768919ULL, 0xc098e6ed0bfbd6f6ULL, 0x6c83ad1260ff20f4ULL, 0x89a63ba4c497b50eULL, // x 2^-5952 ~= 10^-1792 + 0x92ee7fce474479d3ULL, 0xe02017175bf040c6ULL, 0xd82ef2860273de8dULL, 0x8c5827f711735b46ULL, // x 2^-5766 ~= 10^-1736 + 0x7b0e6375ca8c77d9ULL, 0x5f07e1e10097d47fULL, 0x416d7f9ab1e67580ULL, 0x8f17964dfc3961f2ULL, // x 2^-5580 ~= 10^-1680 + 0xc8d869ed561af1ceULL, 0x8b6648e941de779bULL, 0x56700866b85d57feULL, 0x91e4ca5db93dbfecULL, // x 2^-5394 ~= 10^-1624 + 0xfc04df783488a410ULL, 0x64d1f15da2c146b1ULL, 0x43cf71d5c4fd7868ULL, 0x94c0092dd4ef9511ULL, // x 2^-5208 ~= 10^-1568 + 0xfbaf03b48a965a64ULL, 0x9b6122aa2b72a13cULL, 0x387898a6e22f821bULL, 0x97a9991fd8b3afc0ULL, // x 2^-5022 ~= 10^-1512 + 0x50f7f7c13119aaddULL, 0xe415d8b25694250aULL, 0x8f8857e875e7774eULL, 0x9aa1c1f6110c0dd0ULL, // x 2^-4836 ~= 10^-1456 + 0xce214403545fd685ULL, 0xf36d1ad779b90e09ULL, 0xa5c58d5f91a476d7ULL, 0x9da8ccda75b341b5ULL, // x 2^-4650 ~= 10^-1400 + 0x63ddfb68f971b0c5ULL, 0x2822e38faf74b26eULL, 0x6e1f7f1642ebaac8ULL, 0xa0bf0465b455e921ULL, // x 2^-4464 ~= 10^-1344 + 0xf0d00cec9daf7444ULL, 0x6bf3eea6f661a32aULL, 0xfad2be1679765f27ULL, 0xa3e4b4a65e97b76aULL, // x 2^-4278 ~= 10^-1288 + 0x463b4ab4bd478f57ULL, 0x6f6583b5b36d5426ULL, 0x800cfab80c4e2eb1ULL, 0xa71a2b283c14fba6ULL, // x 2^-4092 ~= 10^-1232 + 0xef163df2fa96e983ULL, 0xa825f32bc8f6b080ULL, 0x850b0c5976b21027ULL, 0xaa5fb6fbc115010bULL, // x 2^-3906 ~= 10^-1176 + 0x7db1b3f8e100eb43ULL, 0x2862b1f61d64ddc3ULL, 0x61363686961a41e5ULL, 0xadb5a8bdaaa53051ULL, // x 2^-3720 ~= 10^-1120 + 0xfd349cf00ba1e09aULL, 0x6d282fe1b7112879ULL, 0xc6f075c4b81fc72dULL, 0xb11c529ec0d87268ULL, // x 2^-3534 ~= 10^-1064 + 0xf7221741b221cf6fULL, 0x3739f15b06ac3c76ULL, 0xb4e4be5b6455ef96ULL, 0xb494086bbfea00c3ULL, // x 2^-3348 ~= 10^-1008 + 0xc4e5a2f864c403bbULL, 0x6e33cdcda4367276ULL, 0x24d256c540a50309ULL, 0xb81d1f9569068d8eULL, // x 2^-3162 ~= 10^-952 + 0x276e3f0f67f0553bULL, 0xde73d9d5be6974ULL, 0x6d4aa5b50bb5dc0dULL, 0xbbb7ef38bb827f2dULL, // x 2^-2976 ~= 10^-896 + 0x51a34a3e674484edULL, 0x1fb6069f8b26f840ULL, 0x925624c0d7d93317ULL, 0xbf64d0275747de70ULL, // x 2^-2790 ~= 10^-840 + 0xcc775c8cb6de1dbcULL, 0x6d60d02eac6309eeULL, 0x8e5a2e5116baf191ULL, 0xc3241cf0094a8e70ULL, // x 2^-2604 ~= 10^-784 + 0x6023c8fa17d7b105ULL, 0x69cf8f51d2e5e65ULL, 0xb0560c246f90e9e8ULL, 0xc6f631e782d57096ULL, // x 2^-2418 ~= 10^-728 + 0x92c17acb2d08d5fdULL, 0xc26ffb8e81532725ULL, 0x2ffff1289a804c5aULL, 0xcadb6d313c8736fcULL, // x 2^-2232 ~= 10^-672 + 0x47df78ab9e92897aULL, 0xc02b302a892b81dcULL, 0xa855e127113c887bULL, 0xced42ec885d9dbbeULL, // x 2^-2046 ~= 10^-616 + 0xdaf2dec03ec0c322ULL, 0x72db3bc15b0c7014ULL, 0xe00bad8dfc0d8c8eULL, 0xd2e0d889c213fd60ULL, // x 2^-1860 ~= 10^-560 + 0xd3a04799e4473ac8ULL, 0xa116409a2fdf1e9eULL, 0xc654d07271e6c39fULL, 0xd701ce3bd387bf47ULL, // x 2^-1674 ~= 10^-504 + 0x5c8a5dc65d745a24ULL, 0x2726c48a85389fa7ULL, 0x84c663cee6b86e7cULL, 0xdb377599b6074244ULL, // x 2^-1488 ~= 10^-448 + 0xd7ebc61ba77a9e66ULL, 0x8bf77d4bc59b35b1ULL, 0xcb285ceb2fed040dULL, 0xdf82365c497b5453ULL, // x 2^-1302 ~= 10^-392 + 0x744ce999bfed213aULL, 0x363b1f2c568dc3e2ULL, 0xfd1b1b2308169b25ULL, 0xe3e27a444d8d98b7ULL, // x 2^-1116 ~= 10^-336 + 0x6a40608fe10de7e7ULL, 0xf910f9f648232f14ULL, 0xd1b3400f8f9cff68ULL, 0xe858ad248f5c22c9ULL, // x 2^-930 ~= 10^-280 + 0x9bdbfc21260dd1adULL, 0x4609ac5c7899ca36ULL, 0xa4f8bf5635246428ULL, 0xece53cec4a314ebdULL, // x 2^-744 ~= 10^-224 + 0xd88181aad19d7454ULL, 0xf80f36174730ca34ULL, 0xdc44e6c3cb279ac1ULL, 0xf18899b1bc3f8ca1ULL, // x 2^-558 ~= 10^-168 + 0xee19bfa6947f8e02ULL, 0xaa09501d5954a559ULL, 0x4d4617b5ff4a16d5ULL, 0xf64335bcf065d37dULL, // x 2^-372 ~= 10^-112 + 0xebbc75a03b4d60e6ULL, 0xac2e4f162cfad40aULL, 0xeed6e2f0f0d56712ULL, 0xfb158592be068d2eULL, // x 2^-186 ~= 10^-56 + 0x0ULL, 0x0ULL, 0x0ULL, 0x8000000000000000ULL, // x 2^1 == 10^0 exactly + 0x0ULL, 0x2000000000000000ULL, 0xbff8f10e7a8921a4ULL, 0x82818f1281ed449fULL, // x 2^187 == 10^56 exactly + 0x51775f71e92bf2f2ULL, 0x74a7ef0198791097ULL, 0x3e2cf6bc604ddb0ULL, 0x850fadc09923329eULL, // x 2^373 ~= 10^112 + 0xb204b3d9686f55b5ULL, 0xfb118fc9c217a1d2ULL, 0x90fb44d2f05d0842ULL, 0x87aa9aff79042286ULL, // x 2^559 ~= 10^168 + 0xd7924bff833149faULL, 0xbc10c5c5cda97c8dULL, 0x82bd6b70d99aaa6fULL, 0x8a5296ffe33cc92fULL, // x 2^745 ~= 10^224 + 0xa67d072d3c7fa14bULL, 0x7ec63730f500b406ULL, 0xdb0b487b6423e1e8ULL, 0x8d07e33455637eb2ULL, // x 2^931 ~= 10^280 + 0x546f2a35dc367e47ULL, 0x949063d8a46f0c0eULL, 0x213a4f0aa5e8a7b1ULL, 0x8fcac257558ee4e6ULL, // x 2^1117 ~= 10^336 + 0x50611a621c0ee3aeULL, 0x202d895116aa96beULL, 0x1c306f5d1b0b5fdfULL, 0x929b7871de7f22b9ULL, // x 2^1303 ~= 10^392 + 0xffa6738a27dcf7a3ULL, 0x3c11d8430d5c4802ULL, 0xa7ea9c8838ce9437ULL, 0x957a4ae1ebf7f3d3ULL, // x 2^1489 ~= 10^448 + 0x5bf36c0f40bde99dULL, 0x284ba600ee9f6303ULL, 0xbf1d49cacccd5e68ULL, 0x9867806127ece4f4ULL, // x 2^1675 ~= 10^504 + 0xa6e937834ed12e58ULL, 0x73f26eb82f6b8066ULL, 0x655494c5c95d77f2ULL, 0x9b63610bb9243e46ULL, // x 2^1861 ~= 10^560 + 0xcd4b7660adc6930ULL, 0x8f868688f8eb79ebULL, 0x2e008393fd60b55ULL, 0x9e6e366733f85561ULL, // x 2^2047 ~= 10^616 + 0x3efb9807d86d3c6aULL, 0x84c10a1d22f5adc5ULL, 0x55e04dba4b3bd4ddULL, 0xa1884b69ade24964ULL, // x 2^2233 ~= 10^672 + 0xf065089401df33b4ULL, 0x1fc02370c451a755ULL, 0x44b222741eb1ebbfULL, 0xa4b1ec80f47c84adULL, // x 2^2419 ~= 10^728 + 0xa62d0da836fce7d5ULL, 0x75933380ceb5048cULL, 0x1cf4a5c3bc09fa6fULL, 0xa7eb6799e8aec999ULL, // x 2^2605 ~= 10^784 + 0x7a400df820f096c2ULL, 0x802c4085068d2dd5ULL, 0x3c4a575151b294dcULL, 0xab350c27feb90accULL, // x 2^2791 ~= 10^840 + 0xf48b51375df06e86ULL, 0x412fe9e72afd355eULL, 0x870a8d87239d8f35ULL, 0xae8f2b2ce3d5dbe9ULL, // x 2^2977 ~= 10^896 + 0x881883521930127cULL, 0xe53fd3fcb5b4df25ULL, 0xdd929f09c3eff5acULL, 0xb1fa17404a30e5e8ULL, // x 2^3163 ~= 10^952 + 0x270cd9f1348eb326ULL, 0x37ed82fe9c75fccfULL, 0x1931b583a9431d7eULL, 0xb5762497dbf17a9eULL, // x 2^3349 ~= 10^1008 + 0x8919b01a5b3d9ec1ULL, 0x6a7669bdfc6f699cULL, 0xe30db03e0f8dd286ULL, 0xb903a90f561d25e2ULL, // x 2^3535 ~= 10^1064 + 0xf0461526b4201aa5ULL, 0x7fe40defe17e55f5ULL, 0x9eb5cb19647508c5ULL, 0xbca2fc30cc19f090ULL, // x 2^3721 ~= 10^1120 + 0xd67bf35422978bbfULL, 0xdbb1c416ebe661fULL, 0x24bd4c00042ad125ULL, 0xc054773d149bf26bULL, // x 2^3907 ~= 10^1176 + 0xdd093192ef5508d0ULL, 0x6eac3085943ccc0fULL, 0x7ea30dbd7ea479e3ULL, 0xc418753460cdcca9ULL, // x 2^4093 ~= 10^1232 + 0xfe4ff20db6d25dc2ULL, 0x5d5d5a9519e34a42ULL, 0x764f4cf916b4deceULL, 0xc7ef52defe87b751ULL, // x 2^4279 ~= 10^1288 + 0xd8adfb2e00494c5eULL, 0x72435286baf0e84eULL, 0xbeb7fbdc1cbe8b37ULL, 0xcbd96ed6466cf081ULL, // x 2^4465 ~= 10^1344 + 0xe07c1e4384f594afULL, 0xc6b90b8874d5189ULL, 0xdce472c619aa3f63ULL, 0xcfd7298db6cb9672ULL, // x 2^4651 ~= 10^1400 + 0x5dd902c68fa448cfULL, 0xea8d16bd9544e48eULL, 0xe47defc14a406e4fULL, 0xd3e8e55c3c1f43d0ULL, // x 2^4837 ~= 10^1456 + 0x1223d79357bedca8ULL, 0xeae6c2843752ac35ULL, 0xb7157c60a24a0569ULL, 0xd80f0685a81b2a81ULL, // x 2^5023 ~= 10^1512 + 0xcff72d64bc79e429ULL, 0xccc52c236decd778ULL, 0xfb0b98f6bbc4f0cbULL, 0xdc49f3445824e360ULL, // x 2^5209 ~= 10^1568 + 0x3731f76b905dffbbULL, 0x5e2bddd7d12a9e42ULL, 0xc6c6c1764e047e15ULL, 0xe09a13d30c2dba62ULL, // x 2^5395 ~= 10^1624 + 0xeb58d8ef2ada7c09ULL, 0xbc1a3b726b789947ULL, 0x87e8dcfc09dbc33aULL, 0xe4ffd276eedce658ULL, // x 2^5581 ~= 10^1680 + 0x249a5c06dc5d5db7ULL, 0xa8f09440be97bfe6ULL, 0xb1a3642a8da3cf4fULL, 0xe97b9b89d001dab3ULL, // x 2^5767 ~= 10^1736 + 0xbf34ff7963028cd9ULL, 0xc20578fa3851488bULL, 0x2d4070f33b21ab7bULL, 0xee0ddd84924ab88cULL, // x 2^5953 ~= 10^1792 + 0x2d0511317361d5ULL, 0xd6919e041129a1a7ULL, 0xa2bf0c63a814e04eULL, 0xf2b70909cd3fd35cULL, // x 2^6139 ~= 10^1848 + 0x1fa87f28acf1dcd2ULL, 0xe7a0a88981d1a0f9ULL, 0x8f13995cf9c2747ULL, 0xf77790f0a48a45ceULL, // x 2^6325 ~= 10^1904 + 0x1b6ff8afbe589b72ULL, 0xc851bb3f9aeb1211ULL, 0x7a37993eb21444faULL, 0xfc4fea4fd590b40aULL, // x 2^6511 ~= 10^1960 + 0xef23a4cbc039f0c2ULL, 0xbb3f8498a972f18eULL, 0xb7b1ada9cdeba84dULL, 0x80a046447e3d49f1ULL, // x 2^6698 ~= 10^2016 + 0x2cc44f2b602b6231ULL, 0xf231f4b7996b7278ULL, 0xcc6866c5d69b2cbULL, 0x8324f8aa08d7d411ULL, // x 2^6884 ~= 10^2072 + 0x822c97629a3a4c69ULL, 0x8a9afcdbc940e6f9ULL, 0x7fe2b4308dcbf1a3ULL, 0x85b64a659077660eULL, // x 2^7070 ~= 10^2128 + 0xf66cfcf42d4896b0ULL, 0x1f11852a20ed33c5ULL, 0x1d73ef3eaac3c964ULL, 0x88547abb1d8e5bd9ULL, // x 2^7256 ~= 10^2184 + 0x63093ad0caadb06cULL, 0x31be1482014cdaf0ULL, 0x1e34291b1ef566c7ULL, 0x8affca2bd1f88549ULL, // x 2^7442 ~= 10^2240 + 0xab50f69048738e9aULL, 0xa126c32ff4882be8ULL, 0x9e9383d73d486881ULL, 0x8db87a7c1e56d873ULL, // x 2^7628 ~= 10^2296 + 0xe57e659432b0a73eULL, 0x47a0e15dfc7986b8ULL, 0x9cc5ee51962c011aULL, 0x907eceba168949b3ULL, // x 2^7814 ~= 10^2352 + 0x8a6ff950599f8ae5ULL, 0xd1cbbb7d005a76d3ULL, 0x413407cfeeac9743ULL, 0x93530b43e5e2c129ULL, // x 2^8000 ~= 10^2408 + 0xd4e6b6e847550caaULL, 0x56a3106227b87706ULL, 0x7efa7d29c44e11b7ULL, 0x963575ce63b6332dULL, // x 2^8186 ~= 10^2464 + 0xd835c90b09842263ULL, 0xb69f01a641da2a42ULL, 0x5a848859645d1c6fULL, 0x9926556bc8defe43ULL, // x 2^8372 ~= 10^2520 + 0x9b0ae73c204ecd61ULL, 0x794fd5e5a51ac2fULL, 0x51edea897b34601fULL, 0x9c25f29286e9ddb6ULL, // x 2^8558 ~= 10^2576 + 0x3130484fb0a61d89ULL, 0x32b7105223a27365ULL, 0xb50008d92529e91fULL, 0x9f3497244186fca4ULL, // x 2^8744 ~= 10^2632 + 0x8cd036553f38a1e8ULL, 0x5e997e9f45d7897dULL, 0xf09e780bcc8238d9ULL, 0xa2528e74eaf101fcULL, // x 2^8930 ~= 10^2688 + 0xe1f8b43b08b5d0efULL, 0xa0eaf3f62dc1777cULL, 0x3a5828869701a165ULL, 0xa580255203f84b47ULL, // x 2^9116 ~= 10^2744 + 0x3c7f62e3154fa708ULL, 0x5786f3927eb15bd5ULL, 0x8b231a70eb5444ceULL, 0xa8bdaa0a0064fa44ULL, // x 2^9302 ~= 10^2800 + 0x1ebc24a19cd70a2aULL, 0x843fddd10c7006b8ULL, 0xfa1bde1f473556a4ULL, 0xac0b6c73d065f8ccULL, // x 2^9488 ~= 10^2856 + 0x46b6aae34cfd26fcULL, 0xdb7d919b136c68ULL, 0x7730e00421da4d55ULL, 0xaf69bdf68fc6a740ULL, // x 2^9674 ~= 10^2912 + 0x1c4edcb83fc4c49dULL, 0x61c0edd56bbcb3e8ULL, 0x7f959cb702329d14ULL, 0xb2d8f1915ba88ca5ULL, // x 2^9860 ~= 10^2968 + 0x428c840d247382feULL, 0x9cc3b1569b1325a4ULL, 0x40c3a071220f5567ULL, 0xb6595be34f821493ULL, // x 2^10046 ~= 10^3024 + 0xbeb82e734787ec63ULL, 0xbeff12280d5a1676ULL, 0x11c48d02b8326bd3ULL, 0xb9eb5333aa272e9bULL, // x 2^10232 ~= 10^3080 + 0x302349e12f45c73fULL, 0xb494bcc96d53e49cULL, 0x566765461bd2f61bULL, 0xbd8f2f7a1ba47d6dULL, // x 2^10418 ~= 10^3136 + 0x5704ebf5f16946ceULL, 0x431388ec68ac7a26ULL, 0xb889018e4f6e9a52ULL, 0xc1454a673cb9b1ceULL, // x 2^10604 ~= 10^3192 + 0x5a30431166af9b23ULL, 0x132d031fc1d1fec0ULL, 0xf85333a94848659fULL, 0xc50dff6d30c3aefcULL, // x 2^10790 ~= 10^3248 + 0x7573d4b3ffe4ba3bULL, 0xf888498a40220657ULL, 0x1a1aeae7cf8a9d3dULL, 0xc8e9abc872eb2bc1ULL, // x 2^10976 ~= 10^3304 + 0xb5eaef7441511eb9ULL, 0xc9cf998035a91664ULL, 0x12e29f09d9061609ULL, 0xccd8ae88cf70ad84ULL, // x 2^11162 ~= 10^3360 + 0x73aed4f1908f4d01ULL, 0x8c53e7beeca4578fULL, 0xdf7601457ca20b35ULL, 0xd0db689a89f2f9b1ULL, // x 2^11348 ~= 10^3416 + 0x5adbd55696e1cdd9ULL, 0x4949d09424b87626ULL, 0xcbdcd02f23cc7690ULL, 0xd4f23ccfb1916df5ULL, // x 2^11534 ~= 10^3472 + 0x3f500ccf4ea03593ULL, 0x9b80aac81b50762aULL, 0x44289dd21b589d7aULL, 0xd91d8fe9a3d019ccULL, // x 2^11720 ~= 10^3528 + 0x134ca67a679b84aeULL, 0x8909e424a112a3cdULL, 0x95aa118ec1d08317ULL, 0xdd5dc8a2bf27f3f7ULL, // x 2^11906 ~= 10^3584 + 0xe89e3cf733d9ff40ULL, 0x14344660a175c36ULL, 0x72c4d2cad73b0a7bULL, 0xe1b34fb846321d04ULL, // x 2^12092 ~= 10^3640 + 0x68c0a2c6c02dae9aULL, 0xb11160a6edb5f57ULL, 0xe20a88f1134f906dULL, 0xe61e8ff47461cda9ULL, // x 2^12278 ~= 10^3696 + 0x47fa54906741561aULL, 0xaa13acba1e5511f5ULL, 0xc7c91d5c341ed39dULL, 0xea9ff638c54554e1ULL, // x 2^12464 ~= 10^3752 + 0x365460ed91271c24ULL, 0xabe33496aff629b4ULL, 0xf659ede2159a45ecULL, 0xef37f1886f4b6690ULL, // x 2^12650 ~= 10^3808 + 0xe4cbf4acc7fba37fULL, 0x350e915f7055b1b8ULL, 0x78d946bab954b82fULL, 0xf3e6f313130ef0efULL, // x 2^12836 ~= 10^3864 + 0xe692accdfa5bd859ULL, 0xf4d4d3202379829eULL, 0xc9b1474d8f89c269ULL, 0xf8ad6e3fa030bd15ULL, // x 2^13022 ~= 10^3920 + 0xeca0018ea3b8d1b4ULL, 0xe878edb67072c26dULL, 0x6b1d2745340e7b14ULL, 0xfd8bd8b770cb469eULL, // x 2^13208 ~= 10^3976 + 0xce5fec949ab87cf7ULL, 0x151dcd7a53488c3ULL, 0xf22e502fcdd4bca2ULL, 0x81415538ce493bd5ULL, // x 2^13395 ~= 10^4032 + 0x5e1731fbff8c032eULL, 0xe752f53c2f8fa6c1ULL, 0x7c1735fc3b813c8cULL, 0x83c92edf425b292dULL, // x 2^13581 ~= 10^4088 + 0xb552102ea83f47e6ULL, 0xdf0fd2002ff6b3a3ULL, 0x367500a8e9a178fULL, 0x865db7a9ccd2839eULL, // x 2^13767 ~= 10^4144 + 0x76507bafe00ec873ULL, 0x71b256ecd954434cULL, 0xc9ac50475e25293aULL, 0x88ff2f2bade74531ULL, // x 2^13953 ~= 10^4200 + 0x5e2075ba289a360bULL, 0xac376f28b45e5accULL, 0x879b2e5f6ee8b1cULL, 0x8badd636cc48b341ULL, // x 2^14139 ~= 10^4256 + 0xab87d85e6311e801ULL, 0xb7f786d14d58173dULL, 0x2f33c652bd12fab7ULL, 0x8e69eee1f23f2be5ULL, // x 2^14325 ~= 10^4312 + 0x7fed9b68d77255beULL, 0x35dc241819de7182ULL, 0xad6a6308a8e8b557ULL, 0x9133bc8f2a130fe5ULL, // x 2^14511 ~= 10^4368 + 0x728ae72899d4bd12ULL, 0xe5413d9414142a55ULL, 0x9dbaa465efe141a0ULL, 0x940b83f23a55842aULL, // x 2^14697 ~= 10^4424 + 0xf7740145246fb8fULL, 0x186ef2c39acb4103ULL, 0x888c9ab2fc5b3437ULL, 0x96f18b1742aad751ULL, // x 2^14883 ~= 10^4480 + 0xd8bb0fba2183c6efULL, 0xbf66d66cc34f0197ULL, 0xba00864671d1053fULL, 0x99e6196979b978f1ULL, // x 2^15069 ~= 10^4536 + 0x9b71ed2ceb790e49ULL, 0x6faac32d59cc1f5dULL, 0x61d59d402aae4feaULL, 0x9ce977ba0ce3a0bdULL, // x 2^15255 ~= 10^4592 + 0xa0aa6d5e63991cfbULL, 0x19482fa0ac45669cULL, 0x803c1cd864033781ULL, 0x9ffbf04722750449ULL, // x 2^15441 ~= 10^4648 + 0x95a9949e04b8bff3ULL, 0x900aa3c2f02ac9d4ULL, 0xa28a151725a55e10ULL, 0xa31dcec2fef14b30ULL, // x 2^15627 ~= 10^4704 + 0x3acf9496dade0ce9ULL, 0xbd8ecf923d23bec0ULL, 0x5b8452af2302fe13ULL, 0xa64f605b4e3352cdULL, // x 2^15813 ~= 10^4760 + 0x6204425d2b58e822ULL, 0xdee162a8a1248550ULL, 0x82b84cabc828bf93ULL, 0xa990f3c09110c544ULL, // x 2^15999 ~= 10^4816 + 0x91a2658e0639f32ULL, 0x66fa2184cee0b861ULL, 0x8d29dd5122e4278dULL, 0xace2d92db0390b59ULL, // x 2^16185 ~= 10^4872 + 0x80acda113324758aULL, 0xded179c26d9ab828ULL, 0x58f8fde02c03a6c6ULL, 0xb045626fb50a35e7ULL, // x 2^16371 ~= 10^4928 + 0x7128a8aad239ce8fULL, 0x8737bd250290cd5bULL, 0xd950102978dbd0ffULL, 0xb3b8e2eda91a232dULL, // x 2^16557 ~= 10^4984 +}; +#endif + +#if SWIFT_DTOA_BINARY32_SUPPORT +// Given a power `p`, this returns three values: +// * 64-bit fractions `lower` and `upper` +// * integer `exponent` +// +// The returned values satisty the following: +// ``` +// lower * 2^exponent <= 10^p <= upper * 2^exponent +// ``` +// +// Note: Max(*upper - *lower) = 3 +static void intervalContainingPowerOf10_Binary32(int p, uint64_t *lower, uint64_t *upper, int *exponent) { + if (p >= 0) { + uint64_t base = powersOf10_Exact128[p * 2 + 1]; + *lower = base; + if (p < 28) { + *upper = base; + } else { + *upper = base + 1; + } + } else { + uint64_t base = powersOf10_negativeBinary32[p + 40]; + *lower = base; + *upper = base + 1; + } + *exponent = binaryExponentFor10ToThe(p); +} +#endif + +#if SWIFT_DTOA_BINARY64_SUPPORT +// Given a power `p`, this returns three values: +// * 128-bit fractions `lower` and `upper` +// * integer `exponent` +// +// Note: This function takes on average about 10% of the total runtime +// for formatting a double, as the general case here requires several +// multiplications to accurately reconstruct the lower and upper +// bounds. +// +// The returned values satisty the following: +// ``` +// lower * 2^exponent <= 10^p <= upper * 2^exponent +// ``` +// +// Note: Max(*upper - *lower) = 3 +static void intervalContainingPowerOf10_Binary64(int p, swift_uint128_t *lower, swift_uint128_t *upper, int *exponent) { + if (p >= 0 && p <= 55) { + // Use one 64-bit exact value + swift_uint128_t exact; + initialize128WithHighLow64(exact, + powersOf10_Exact128[p * 2 + 1], + powersOf10_Exact128[p * 2]); + *upper = exact; + *lower = exact; + *exponent = binaryExponentFor10ToThe(p); + return; + } + + // Multiply a 128-bit approximate value with a 64-bit exact value + int index = p + 400; + // Copy a pair of uint64_t into a swift_uint128_t + int mainPower = index / 28; + const uint64_t *base_p = powersOf10_Binary64 + mainPower * 2; + swift_uint128_t base; + initialize128WithHighLow64(base, base_p[1], base_p[0]); + int extraPower = index - mainPower * 28; + int baseExponent = binaryExponentFor10ToThe(p - extraPower); + + int e = baseExponent; + if (extraPower == 0) { + // We're using a tightly-rounded lower bound, so +1 gives a tightly-rounded upper bound + *lower = base; +#if HAVE_UINT128_T + *upper = *lower + 1; +#else + *upper = *lower; + upper->low += 1; +#endif + } else { + // We need to multiply two values to get a lower bound + int64_t extra = powersOf10_Exact128[extraPower * 2 + 1]; + e += binaryExponentFor10ToThe(extraPower); + *lower = multiply128x64RoundingDown(base, extra); + // +2 is enough to get an upper bound + // (Verified through exhaustive testing.) +#if HAVE_UINT128_T + *upper = *lower + 2; +#else + *upper = *lower; + upper->low += 2; +#endif + } + *exponent = e; +} +#endif + +#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT +// As above, but returning 256-bit fractions suitable for +// converting float80/binary128. +static void intervalContainingPowerOf10_Binary128(int p, swift_uint256_t *lower, swift_uint256_t *upper, int *exponent) { + if (p >= 0 && p <= 55) { + // We have an exact form, return a zero-width interval + // and avoid the multiplication. + uint64_t exactLow = powersOf10_Exact128[p * 2]; + uint64_t exactHigh = powersOf10_Exact128[p * 2 + 1]; + initialize256WithHighMidLow64(*lower, exactHigh, exactLow, 0, 0); + *upper = *lower; + *exponent = binaryExponentFor10ToThe(p); + return; + } + + int index = p + 4984; + const uint64_t *base_p = powersOf10_Binary128 + (index / 56) * 4; + // The values in the table are always tightly rounded down, so we use that + // directly as a lower bound. + initialize256WithHighMidLow64(*lower, base_p[3], base_p[2], base_p[1], base_p[0]); + int extraPower = index % 56; + int e = binaryExponentFor10ToThe(p - extraPower); + + if (extraPower > 0) { + swift_uint128_t extra; + initialize128WithHighLow64(extra, + powersOf10_Exact128[extraPower * 2 + 1], + powersOf10_Exact128[extraPower * 2]); + multiply256x128RoundingDown(lower, extra); + e += binaryExponentFor10ToThe(extraPower); + } + // We could compute upper similar to lower using rounding-up + // multiplications, but this is faster. + // Since there's just one multiplication, we can prove that 2 is + // enough to get a true upper bound, and we've verified (through + // exhaustive testing) that the least-significant component never + // wraps. + *upper = *lower; +#if HAVE_UINT128_T + upper->low += 2; +#else + upper->elt[0] += 2; +#endif + + *exponent = e; +} +#endif diff --git a/src/3rd_party/swiftdtoa/SwiftDtoa.h b/src/3rd_party/swiftdtoa/SwiftDtoa.h new file mode 100644 index 00000000..b97e696f --- /dev/null +++ b/src/3rd_party/swiftdtoa/SwiftDtoa.h @@ -0,0 +1,286 @@ +//===--- SwiftDtoa.h ---------------------------------------------*- c -*-===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2018, 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===---------------------------------------------------------------------===// +// +/// About SwiftDtoa +/// =============== +/// +/// SwiftDtoa is the C implementation that supports the `.description` +/// and `.debugDescription` properties for the standard Swift +/// floating-point types. These functions produce the "optimal form" +/// for the binary floating point value. The optimal form is a +/// decimal representation that satisfies the following properties: +/// +/// 1. Accurate. Parsing the value back to a binary floating-point +/// value of the same precision will exactly yield the original +/// value. For example, `Double(d.description) == d` for all `Double` +/// values `d` (except for NaN values, of course). +/// +/// 2. Short. Of all accurate results, the returned value will +/// contain the minimum number of significant digits. Note that +/// this is not quite the same as C++ `to_chars` which promises the +/// minimal number of characters. +/// +/// 3. Close. Of all accurate, short results, the value printed will +/// be the one that is closest to the exact binary floating-point +/// value. +/// +/// The optimal form is the ideal textual form for use in JSON and +/// similar interchange formats because it is accurate, compact, and +/// can be generated very quickly. It is also ideal for logging and +/// debugging use; the accuracy guarantees that the result can be +/// cut-and-pasted to obtain the exact original value, and the +/// shortness property eliminates unnecessary digits that can be +/// confusing to readers. +/// +/// Algorithms that produce such output have been known since at least +/// 1990, when Steele and White published their Dragon4 algorithm. +/// However, the earliest algorithms required high-precision +/// arithmetic which limited their use. Starting in 2010 with the +/// publication of Grisu3, there has been a surge of interest and +/// there are now a number of algorithms that can produce optimal +/// forms very quickly. This particular implementation is loosely +/// based on Grisu2 but incorporates concepts from Errol and Ryu that +/// make it significantly faster and ensure accuracy in all cases. +/// +/// About SwiftDtoa v1 +/// ------------------ +/// +/// The first version of SwiftDtoa was committed to the Swift runtime +/// in 2018. It supported Swift's Float, Double, and Float80 formats. +/// +/// About SwiftDtoa v1a +/// ------------------- +/// +/// Version 1a of SwiftDtoa added support for Float16. +/// +/// About SwiftDtoa v2 +/// ------------------ +/// +/// Version 2 of SwiftDtoa is a major overhaul with a number of +/// algorithmic improvements to make it faster (especially for Float16 +/// and Float80), smaller, and more portable (the code only requires +/// C99 and makes no use of C or C++ floating-point facilities). It +/// also includes experimental support for IEEE 754 quad-precision +/// binary128 format, which is not currently supported by Swift. +// +//===---------------------------------------------------------------------===// + +#ifndef SWIFT_DTOA_H +#define SWIFT_DTOA_H + +#include +#include +#include +#include + +// +// IEEE 754 Binary16 support (also known as "half-precision") +// + +// Enable this by default. +// Force disable: -DSWIFT_DTOA_BINARY16_SUPPORT=0 +#ifndef SWIFT_DTOA_BINARY16_SUPPORT + #define SWIFT_DTOA_BINARY16_SUPPORT 1 +#endif + +// +// IEEE 754 Binary32 support (also known as "single-precision") +// + +// Does "float" on this system use binary32 format? +// (Almost all modern systems do this.) +#if (FLT_RADIX == 2) && (FLT_MANT_DIG == 24) && (FLT_MIN_EXP == -125) && (FLT_MAX_EXP == 128) + #define FLOAT_IS_BINARY32 1 +#else + #undef FLOAT_IS_BINARY32 +#endif + +// We can format binary32 values even if the local C environment +// does not support it. But `float` == binary32 almost everywhere, +// so we enable it by default. +// Force disable: -DSWIFT_DTOA_BINARY32_SUPPORT=0 +#ifndef SWIFT_DTOA_BINARY32_SUPPORT + #define SWIFT_DTOA_BINARY32_SUPPORT 1 +#endif + +// +// IEEE 754 Binary64 support (also known as "double-precision") +// + +// Does "double" on this system use binary64 format? +// (Almost all modern systems do this.) +#if (FLT_RADIX == 2) && (DBL_MANT_DIG == 53) && (DBL_MIN_EXP == -1021) && (DBL_MAX_EXP == 1024) + #define DOUBLE_IS_BINARY64 1 +#else + #undef DOUBLE_IS_BINARY64 +#endif + +// Does "long double" on this system use binary64 format? +// (Windows, for example.) +#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 53) && (LDBL_MIN_EXP == -1021) && (LDBL_MAX_EXP == 1024) + #define LONG_DOUBLE_IS_BINARY64 1 +#else + #undef LONG_DOUBLE_IS_BINARY64 +#endif + +// We can format binary64 values even if the local C environment +// does not support it. But `double` == binary64 almost everywhere, +// so we enable it by default. +// Force disable: -DSWIFT_DTOA_BINARY64_SUPPORT=0 +#ifndef SWIFT_DTOA_BINARY64_SUPPORT + #define SWIFT_DTOA_BINARY64_SUPPORT 1 +#endif + +// +// Intel x87 Float80 support +// + +// Is "long double" on this system the same as Float80? +// (macOS, Linux, and FreeBSD when running on x86 or x86_64 processors.) +#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 64) && (LDBL_MIN_EXP == -16381) && (LDBL_MAX_EXP == 16384) + #define LONG_DOUBLE_IS_FLOAT80 1 +#else + #undef LONG_DOUBLE_IS_FLOAT80 +#endif + +// We can format float80 values even if the local C environment +// does not support it. However, by default, we only enable it for +// environments where float80 == long double. +// Force enable: -DSWIFT_DTOA_FLOAT80_SUPPORT=1 +// Force disable: -DSWIFT_DTOA_FLOAT80_SUPPORT=0 +#ifndef SWIFT_DTOA_FLOAT80_SUPPORT + #if LONG_DOUBLE_IS_FLOAT80 + #define SWIFT_DTOA_FLOAT80_SUPPORT 1 + #endif +#endif + +// +// IEEE 754 Binary128 support +// + +// Is "long double" on this system the same as Binary128? +// (Android on LP64 hardware.) +#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 113) && (LDBL_MIN_EXP == -16381) && (LDBL_MAX_EXP == 16384) + #define LONG_DOUBLE_IS_BINARY128 1 +#else + #undef LONG_DOUBLE_IS_BINARY128 +#endif + +// We can format binary128 values even if the local C environment +// does not support it. However, by default, we only enable it for +// environments where binary128 == long double. +// Force enable: -DSWIFT_DTOA_BINARY128_SUPPORT=1 +// Force disable: -DSWIFT_DTOA_BINARY128_SUPPORT=0 +#ifndef SWIFT_DTOA_BINARY128_SUPPORT + #if LONG_DOUBLE_IS_BINARY128 + #define SWIFT_DTOA_BINARY128_SUPPORT 1 + #endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Format a floating point value as an ASCII string +// +// Input: +// * `d` is the number to be formatted +// * `dest` is a buffer of length `length` +// +// Ouput: +// * Return value is the length of the string placed into `dest` +// or zero if the buffer is too small. +// * For infinity, it copies "inf" or "-inf". +// * For NaN, it outputs a Swift-style detailed dump, including +// sign, signaling/quiet, and payload (if any). Typical output: +// "nan", "-nan", "-snan(0x1234)". +// * For zero, it outputs "0.0" or "-0.0" depending on the sign. +// * The destination buffer is always null-terminated (even on error) +// unless the length is zero. +// +// Note: If you want to customize the output for Infinity, zero, or +// Nan, you can easily write a wrapper function that uses `fpclassify` +// to identify those cases and only calls through to these functions +// for normal and subnormal values. +// +// Guarantees: +// +// * Accurate. If you parse the result back to the same floating-point +// format via an accurate algorithm (such as Clinger's algorithm), +// the resulting value will be _exactly_ equal to the original value. +// On most systems, this implies that using `strtod` to parse the +// output of `swift_dtoa_optimal_double` will yield exactly the +// original value. +// +// * Short. No other accurate result will have fewer digits. +// +// * Close. If there are multiple possible decimal forms that are +// both accurate and short, the form computed here will be +// closest to the original binary value. +// +// Naming: The `_p` forms take a `const void *` pointing to the value +// in memory. These forms do not require any support from the local C +// environment. In particular, they should work correctly even on +// systems with no floating-point support. Forms ending in a C +// floating-point type (e.g., "_float", "_double") are identical but +// take the corresponding argument type. These forms obviously +// require the C environment to support passing floating-point types as +// function arguments. + +#if SWIFT_DTOA_BINARY16_SUPPORT +size_t swift_dtoa_optimal_binary16_p(const void *, char *dest, size_t length); +#endif + +#if SWIFT_DTOA_BINARY32_SUPPORT +size_t swift_dtoa_optimal_binary32_p(const void *, char *dest, size_t length); +#if FLOAT_IS_BINARY32 +// If `float` happens to be binary32, define the convenience wrapper. +size_t swift_dtoa_optimal_float(float, char *dest, size_t length); +#endif +#endif + +#if SWIFT_DTOA_BINARY64_SUPPORT +size_t swift_dtoa_optimal_binary64_p(const void *, char *dest, size_t length); +#if DOUBLE_IS_BINARY64 +// If `double` happens to be binary64, define the convenience wrapper. +size_t swift_dtoa_optimal_double(double, char *dest, size_t length); +#endif +#if LONG_DOUBLE_IS_BINARY64 +// If `long double` happens to be binary64, define the convenience wrapper. +size_t swift_dtoa_optimal_long_double(long double, char *dest, size_t length); +#endif +#endif + +#if SWIFT_DTOA_FLOAT80_SUPPORT +// Universal entry point works on all platforms, regardless of +// whether the local system has direct support for float80 +size_t swift_dtoa_optimal_float80_p(const void *, char *dest, size_t length); +#if LONG_DOUBLE_IS_FLOAT80 +// If 'long double' happens to be float80, define a convenience wrapper. +size_t swift_dtoa_optimal_long_double(long double, char *dest, size_t length); +#endif +#endif + +#if SWIFT_DTOA_BINARY128_SUPPORT +// Universal entry point works on all platforms, regardless of +// whether the local system has direct support for float80 +size_t swift_dtoa_optimal_binary128_p(const void *, char *dest, size_t length); +#if LONG_DOUBLE_IS_BINARY128 +// If 'long double' happens to be binary128, define a convenience wrapper. +size_t swift_dtoa_optimal_long_double(long double, char *dest, size_t length); +#endif +#endif + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/3rd_party/tweetnacl/tweetnacl.cpp b/src/3rd_party/tweetnacl/tweetnacl.cpp new file mode 100644 index 00000000..7af351bc --- /dev/null +++ b/src/3rd_party/tweetnacl/tweetnacl.cpp @@ -0,0 +1,827 @@ +#pragma warning(disable: 4068) +#pragma warning(disable: 4018) +#pragma warning(disable: 4146) +#pragma warning(disable: 4244) +#pragma GCC diagnostic ignored "-Wsign-compare" + +//Need to add this to make everything work +#ifdef __cplusplus +extern "C" { +#endif + +#include "tweetnacl.h" +#define FOR(i,n) for (i = 0;i < n;++i) +#define sv static void + +typedef unsigned char u8; +typedef unsigned long u32; +typedef unsigned long long u64; +typedef long long i64; +typedef i64 gf[16]; +extern void randombytes(u8 *,u64); + +static const u8 + _0[16] = {0}, + _9[32] = {9}; +static const gf + gf0 = {0}, + gf1 = {1}, + _121665 = {0xDB41,1}, + D = {0x78a3, 0x1359, 0x4dca, 0x75eb, 0xd8ab, 0x4141, 0x0a4d, 0x0070, 0xe898, 0x7779, 0x4079, 0x8cc7, 0xfe73, 0x2b6f, 0x6cee, 0x5203}, + D2 = {0xf159, 0x26b2, 0x9b94, 0xebd6, 0xb156, 0x8283, 0x149a, 0x00e0, 0xd130, 0xeef3, 0x80f2, 0x198e, 0xfce7, 0x56df, 0xd9dc, 0x2406}, + X = {0xd51a, 0x8f25, 0x2d60, 0xc956, 0xa7b2, 0x9525, 0xc760, 0x692c, 0xdc5c, 0xfdd6, 0xe231, 0xc0a4, 0x53fe, 0xcd6e, 0x36d3, 0x2169}, + Y = {0x6658, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666, 0x6666}, + I = {0xa0b0, 0x4a0e, 0x1b27, 0xc4ee, 0xe478, 0xad2f, 0x1806, 0x2f43, 0xd7a7, 0x3dfb, 0x0099, 0x2b4d, 0xdf0b, 0x4fc1, 0x2480, 0x2b83}; + +static u32 L32(u32 x,int c) { return (x << c) | ((x&0xffffffff) >> (32 - c)); } + +static u32 ld32(const u8 *x) +{ + u32 u = x[3]; + u = (u<<8)|x[2]; + u = (u<<8)|x[1]; + return (u<<8)|x[0]; +} + +static u64 dl64(const u8 *x) +{ + u64 i,u=0; + FOR(i,8) u=(u<<8)|x[i]; + return u; +} + +sv st32(u8 *x,u32 u) +{ + int i; + FOR(i,4) { x[i] = u; u >>= 8; } +} + +sv ts64(u8 *x,u64 u) +{ + int i; + for (i = 7;i >= 0;--i) { x[i] = u; u >>= 8; } +} + +static int vn(const u8 *x,const u8 *y,int n) +{ + u32 i,d = 0; + FOR(i,n) d |= x[i]^y[i]; + return (1 & ((d - 1) >> 8)) - 1; +} + +int crypto_verify_16(const u8 *x,const u8 *y) +{ + return vn(x,y,16); +} + +int crypto_verify_32(const u8 *x,const u8 *y) +{ + return vn(x,y,32); +} + +sv core(u8 *out,const u8 *in,const u8 *k,const u8 *c,int h) +{ + u32 w[16],x[16],y[16],t[4]; + int i,j,m; + + FOR(i,4) { + x[5*i] = ld32(c+4*i); + x[1+i] = ld32(k+4*i); + x[6+i] = ld32(in+4*i); + x[11+i] = ld32(k+16+4*i); + } + + FOR(i,16) y[i] = x[i]; + + FOR(i,20) { + FOR(j,4) { + FOR(m,4) t[m] = x[(5*j+4*m)%16]; + t[1] ^= L32(t[0]+t[3], 7); + t[2] ^= L32(t[1]+t[0], 9); + t[3] ^= L32(t[2]+t[1],13); + t[0] ^= L32(t[3]+t[2],18); + FOR(m,4) w[4*j+(j+m)%4] = t[m]; + } + FOR(m,16) x[m] = w[m]; + } + + if (h) { + FOR(i,16) x[i] += y[i]; + FOR(i,4) { + x[5*i] -= ld32(c+4*i); + x[6+i] -= ld32(in+4*i); + } + FOR(i,4) { + st32(out+4*i,x[5*i]); + st32(out+16+4*i,x[6+i]); + } + } else + FOR(i,16) st32(out + 4 * i,x[i] + y[i]); +} + +int crypto_core_salsa20(u8 *out,const u8 *in,const u8 *k,const u8 *c) +{ + core(out,in,k,c,0); + return 0; +} + +int crypto_core_hsalsa20(u8 *out,const u8 *in,const u8 *k,const u8 *c) +{ + core(out,in,k,c,1); + return 0; +} + +//modified from original due to compilation issues on some compilers +//static const u8 sigma[16] = "expand 32-byte k"; +static const u8 sigma[16] = { 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' }; + +int crypto_stream_salsa20_xor(u8 *c,const u8 *m,u64 b,const u8 *n,const u8 *k) +{ + u8 z[16],x[64]; + u32 u,i; + if (!b) return 0; + FOR(i,16) z[i] = 0; + FOR(i,8) z[i] = n[i]; + while (b >= 64) { + crypto_core_salsa20(x,z,k,sigma); + FOR(i,64) c[i] = (m?m[i]:0) ^ x[i]; + u = 1; + for (i = 8;i < 16;++i) { + u += (u32) z[i]; + z[i] = u; + u >>= 8; + } + b -= 64; + c += 64; + if (m) m += 64; + } + if (b) { + crypto_core_salsa20(x,z,k,sigma); + FOR(i,b) c[i] = (m?m[i]:0) ^ x[i]; + } + return 0; +} + +int crypto_stream_salsa20(u8 *c,u64 d,const u8 *n,const u8 *k) +{ + return crypto_stream_salsa20_xor(c,0,d,n,k); +} + +int crypto_stream(u8 *c,u64 d,const u8 *n,const u8 *k) +{ + u8 s[32]; + crypto_core_hsalsa20(s,n,k,sigma); + return crypto_stream_salsa20(c,d,n+16,s); +} + +int crypto_stream_xor(u8 *c,const u8 *m,u64 d,const u8 *n,const u8 *k) +{ + u8 s[32]; + crypto_core_hsalsa20(s,n,k,sigma); + return crypto_stream_salsa20_xor(c,m,d,n+16,s); +} + +sv add1305(u32 *h,const u32 *c) +{ + u32 j,u = 0; + FOR(j,17) { + u += h[j] + c[j]; + h[j] = u & 255; + u >>= 8; + } +} + +static const u32 minusp[17] = { + 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252 +} ; + +int crypto_onetimeauth(u8 *out,const u8 *m,u64 n,const u8 *k) +{ + u32 s,i,j,u,x[17],r[17],h[17],c[17],g[17]; + + FOR(j,17) r[j]=h[j]=0; + FOR(j,16) r[j]=k[j]; + r[3]&=15; + r[4]&=252; + r[7]&=15; + r[8]&=252; + r[11]&=15; + r[12]&=252; + r[15]&=15; + + while (n > 0) { + FOR(j,17) c[j] = 0; + for (j = 0;(j < 16) && (j < n);++j) c[j] = m[j]; + c[j] = 1; + m += j; n -= j; + add1305(h,c); + FOR(i,17) { + x[i] = 0; + FOR(j,17) x[i] += h[j] * ((j <= i) ? r[i - j] : 320 * r[i + 17 - j]); + } + FOR(i,17) h[i] = x[i]; + u = 0; + FOR(j,16) { + u += h[j]; + h[j] = u & 255; + u >>= 8; + } + u += h[16]; h[16] = u & 3; + u = 5 * (u >> 2); + FOR(j,16) { + u += h[j]; + h[j] = u & 255; + u >>= 8; + } + u += h[16]; h[16] = u; + } + + FOR(j,17) g[j] = h[j]; + add1305(h,minusp); + s = -(h[16] >> 7); + FOR(j,17) h[j] ^= s & (g[j] ^ h[j]); + + FOR(j,16) c[j] = k[j + 16]; + c[16] = 0; + add1305(h,c); + FOR(j,16) out[j] = h[j]; + return 0; +} + +int crypto_onetimeauth_verify(const u8 *h,const u8 *m,u64 n,const u8 *k) +{ + u8 x[16]; + crypto_onetimeauth(x,m,n,k); + return crypto_verify_16(h,x); +} + +int crypto_secretbox(u8 *c,const u8 *m,u64 d,const u8 *n,const u8 *k) +{ + int i; + if (d < 32) return -1; + crypto_stream_xor(c,m,d,n,k); + crypto_onetimeauth(c + 16,c + 32,d - 32,c); + FOR(i,16) c[i] = 0; + return 0; +} + +int crypto_secretbox_open(u8 *m,const u8 *c,u64 d,const u8 *n,const u8 *k) +{ + int i; + u8 x[32]; + if (d < 32) return -1; + crypto_stream(x,32,n,k); + if (crypto_onetimeauth_verify(c + 16,c + 32,d - 32,x) != 0) return -1; + crypto_stream_xor(m,c,d,n,k); + FOR(i,32) m[i] = 0; + return 0; +} + +sv set25519(gf r, const gf a) +{ + int i; + FOR(i,16) r[i]=a[i]; +} + +sv car25519(gf o) +{ + int i; + i64 c; + FOR(i,16) { + o[i]+=(1LL<<16); + c=o[i]>>16; + o[(i+1)*(i<15)]+=c-1+37*(c-1)*(i==15); + o[i]-=c<<16; + } +} + +sv sel25519(gf p,gf q,int b) +{ + i64 t,i,c=~(b-1); + FOR(i,16) { + t= c&(p[i]^q[i]); + p[i]^=t; + q[i]^=t; + } +} + +sv pack25519(u8 *o,const gf n) +{ + int i,j,b; + gf m,t; + FOR(i,16) t[i]=n[i]; + car25519(t); + car25519(t); + car25519(t); + FOR(j,2) { + m[0]=t[0]-0xffed; + for(i=1;i<15;i++) { + m[i]=t[i]-0xffff-((m[i-1]>>16)&1); + m[i-1]&=0xffff; + } + m[15]=t[15]-0x7fff-((m[14]>>16)&1); + b=(m[15]>>16)&1; + m[14]&=0xffff; + sel25519(t,m,1-b); + } + FOR(i,16) { + o[2*i]=t[i]&0xff; + o[2*i+1]=t[i]>>8; + } +} + +static int neq25519(const gf a, const gf b) +{ + u8 c[32],d[32]; + pack25519(c,a); + pack25519(d,b); + return crypto_verify_32(c,d); +} + +static u8 par25519(const gf a) +{ + u8 d[32]; + pack25519(d,a); + return d[0]&1; +} + +sv unpack25519(gf o, const u8 *n) +{ + int i; + FOR(i,16) o[i]=n[2*i]+((i64)n[2*i+1]<<8); + o[15]&=0x7fff; +} + +sv A(gf o,const gf a,const gf b) +{ + int i; + FOR(i,16) o[i]=a[i]+b[i]; +} + +sv Z(gf o,const gf a,const gf b) +{ + int i; + FOR(i,16) o[i]=a[i]-b[i]; +} + +sv M(gf o,const gf a,const gf b) +{ + i64 i,j,t[31]; + FOR(i,31) t[i]=0; + FOR(i,16) FOR(j,16) t[i+j]+=a[i]*b[j]; + FOR(i,15) t[i]+=38*t[i+16]; + FOR(i,16) o[i]=t[i]; + car25519(o); + car25519(o); +} + +sv S(gf o,const gf a) +{ + M(o,a,a); +} + +sv inv25519(gf o,const gf i) +{ + gf c; + int a; + FOR(a,16) c[a]=i[a]; + for(a=253;a>=0;a--) { + S(c,c); + if(a!=2&&a!=4) M(c,c,i); + } + FOR(a,16) o[a]=c[a]; +} + +sv pow2523(gf o,const gf i) +{ + gf c; + int a; + FOR(a,16) c[a]=i[a]; + for(a=250;a>=0;a--) { + S(c,c); + if(a!=1) M(c,c,i); + } + FOR(a,16) o[a]=c[a]; +} + +int crypto_scalarmult(u8 *q,const u8 *n,const u8 *p) +{ + u8 z[32]; + i64 x[80],r,i; + gf a,b,c,d,e,f; + FOR(i,31) z[i]=n[i]; + z[31]=(n[31]&127)|64; + z[0]&=248; + unpack25519(x,p); + FOR(i,16) { + b[i]=x[i]; + d[i]=a[i]=c[i]=0; + } + a[0]=d[0]=1; + for(i=254;i>=0;--i) { + r=(z[i>>3]>>(i&7))&1; + sel25519(a,b,r); + sel25519(c,d,r); + A(e,a,c); + Z(a,a,c); + A(c,b,d); + Z(b,b,d); + S(d,e); + S(f,a); + M(a,c,a); + M(c,b,e); + A(e,a,c); + Z(a,a,c); + S(b,a); + Z(c,d,f); + M(a,c,_121665); + A(a,a,d); + M(c,c,a); + M(a,d,f); + M(d,b,x); + S(b,e); + sel25519(a,b,r); + sel25519(c,d,r); + } + FOR(i,16) { + x[i+16]=a[i]; + x[i+32]=c[i]; + x[i+48]=b[i]; + x[i+64]=d[i]; + } + inv25519(x+32,x+32); + M(x+16,x+16,x+32); + pack25519(q,x+16); + return 0; +} + +int crypto_scalarmult_base(u8 *q,const u8 *n) +{ + return crypto_scalarmult(q,n,_9); +} + +int crypto_box_keypair(u8 *y,u8 *x) +{ + randombytes(x,32); + return crypto_scalarmult_base(y,x); +} + +int crypto_box_beforenm(u8 *k,const u8 *y,const u8 *x) +{ + u8 s[32]; + crypto_scalarmult(s,x,y); + return crypto_core_hsalsa20(k,_0,s,sigma); +} + +int crypto_box_afternm(u8 *c,const u8 *m,u64 d,const u8 *n,const u8 *k) +{ + return crypto_secretbox(c,m,d,n,k); +} + +int crypto_box_open_afternm(u8 *m,const u8 *c,u64 d,const u8 *n,const u8 *k) +{ + return crypto_secretbox_open(m,c,d,n,k); +} + +int crypto_box(u8 *c,const u8 *m,u64 d,const u8 *n,const u8 *y,const u8 *x) +{ + u8 k[32]; + crypto_box_beforenm(k,y,x); + return crypto_box_afternm(c,m,d,n,k); +} + +int crypto_box_open(u8 *m,const u8 *c,u64 d,const u8 *n,const u8 *y,const u8 *x) +{ + u8 k[32]; + crypto_box_beforenm(k,y,x); + return crypto_box_open_afternm(m,c,d,n,k); +} + +static u64 R(u64 x,int c) { return (x >> c) | (x << (64 - c)); } +static u64 Ch(u64 x,u64 y,u64 z) { return (x & y) ^ (~x & z); } +static u64 Maj(u64 x,u64 y,u64 z) { return (x & y) ^ (x & z) ^ (y & z); } +static u64 Sigma0(u64 x) { return R(x,28) ^ R(x,34) ^ R(x,39); } +static u64 Sigma1(u64 x) { return R(x,14) ^ R(x,18) ^ R(x,41); } +static u64 sigma0(u64 x) { return R(x, 1) ^ R(x, 8) ^ (x >> 7); } +static u64 sigma1(u64 x) { return R(x,19) ^ R(x,61) ^ (x >> 6); } + +static const u64 K[80] = +{ + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL +}; + +int crypto_hashblocks(u8 *x,const u8 *m,u64 n) +{ + u64 z[8],b[8],a[8],w[16],t; + int i,j; + + FOR(i,8) z[i] = a[i] = dl64(x + 8 * i); + + while (n >= 128) { + FOR(i,16) w[i] = dl64(m + 8 * i); + + FOR(i,80) { + FOR(j,8) b[j] = a[j]; + t = a[7] + Sigma1(a[4]) + Ch(a[4],a[5],a[6]) + K[i] + w[i%16]; + b[7] = t + Sigma0(a[0]) + Maj(a[0],a[1],a[2]); + b[3] += t; + FOR(j,8) a[(j+1)%8] = b[j]; + if (i%16 == 15) + FOR(j,16) + w[j] += w[(j+9)%16] + sigma0(w[(j+1)%16]) + sigma1(w[(j+14)%16]); + } + + FOR(i,8) { a[i] += z[i]; z[i] = a[i]; } + + m += 128; + n -= 128; + } + + FOR(i,8) ts64(x+8*i,z[i]); + + return n; +} + +static const u8 iv[64] = { + 0x6a,0x09,0xe6,0x67,0xf3,0xbc,0xc9,0x08, + 0xbb,0x67,0xae,0x85,0x84,0xca,0xa7,0x3b, + 0x3c,0x6e,0xf3,0x72,0xfe,0x94,0xf8,0x2b, + 0xa5,0x4f,0xf5,0x3a,0x5f,0x1d,0x36,0xf1, + 0x51,0x0e,0x52,0x7f,0xad,0xe6,0x82,0xd1, + 0x9b,0x05,0x68,0x8c,0x2b,0x3e,0x6c,0x1f, + 0x1f,0x83,0xd9,0xab,0xfb,0x41,0xbd,0x6b, + 0x5b,0xe0,0xcd,0x19,0x13,0x7e,0x21,0x79 +} ; + +int crypto_hash(u8 *out,const u8 *m,u64 n) +{ + u8 h[64],x[256]; + u64 i,b = n; + + FOR(i,64) h[i] = iv[i]; + + crypto_hashblocks(h,m,n); + m += n; + n &= 127; + m -= n; + + FOR(i,256) x[i] = 0; + FOR(i,n) x[i] = m[i]; + x[n] = 128; + + n = 256-128*(n<112); + x[n-9] = b >> 61; + ts64(x+n-8,b<<3); + crypto_hashblocks(h,x,n); + + FOR(i,64) out[i] = h[i]; + + return 0; +} + +sv add(gf p[4],gf q[4]) +{ + gf a,b,c,d,t,e,f,g,h; + + Z(a, p[1], p[0]); + Z(t, q[1], q[0]); + M(a, a, t); + A(b, p[0], p[1]); + A(t, q[0], q[1]); + M(b, b, t); + M(c, p[3], q[3]); + M(c, c, D2); + M(d, p[2], q[2]); + A(d, d, d); + Z(e, b, a); + Z(f, d, c); + A(g, d, c); + A(h, b, a); + + M(p[0], e, f); + M(p[1], h, g); + M(p[2], g, f); + M(p[3], e, h); +} + +sv cswap(gf p[4],gf q[4],u8 b) +{ + int i; + FOR(i,4) + sel25519(p[i],q[i],b); +} + +sv pack(u8 *r,gf p[4]) +{ + gf tx, ty, zi; + inv25519(zi, p[2]); + M(tx, p[0], zi); + M(ty, p[1], zi); + pack25519(r, ty); + r[31] ^= par25519(tx) << 7; +} + +sv scalarmult(gf p[4],gf q[4],const u8 *s) +{ + int i; + set25519(p[0],gf0); + set25519(p[1],gf1); + set25519(p[2],gf1); + set25519(p[3],gf0); + for (i = 255;i >= 0;--i) { + u8 b = (s[i/8]>>(i&7))&1; + cswap(p,q,b); + add(q,p); + add(p,p); + cswap(p,q,b); + } +} + +sv scalarbase(gf p[4],const u8 *s) +{ + gf q[4]; + set25519(q[0],X); + set25519(q[1],Y); + set25519(q[2],gf1); + M(q[3],X,Y); + scalarmult(p,q,s); +} + +int crypto_sign_keypair(u8 *pk, u8 *sk) +{ + u8 d[64]; + gf p[4]; + int i; + + randombytes(sk, 32); + crypto_hash(d, sk, 32); + d[0] &= 248; + d[31] &= 127; + d[31] |= 64; + + scalarbase(p,d); + pack(pk,p); + + FOR(i,32) sk[32 + i] = pk[i]; + return 0; +} + +static const u64 L[32] = {0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7, 0xa2, 0xde, 0xf9, 0xde, 0x14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x10}; + +sv modL(u8 *r,i64 x[64]) +{ + i64 carry,i,j; + for (i = 63;i >= 32;--i) { + carry = 0; + for (j = i - 32;j < i - 12;++j) { + x[j] += carry - 16 * x[i] * L[j - (i - 32)]; + carry = (x[j] + 128) >> 8; + x[j] -= carry << 8; + } + x[j] += carry; + x[i] = 0; + } + carry = 0; + FOR(j,32) { + x[j] += carry - (x[31] >> 4) * L[j]; + carry = x[j] >> 8; + x[j] &= 255; + } + FOR(j,32) x[j] -= carry * L[j]; + FOR(i,32) { + x[i+1] += x[i] >> 8; + r[i] = x[i] & 255; + } +} + +sv reduce(u8 *r) +{ + i64 x[64],i; + FOR(i,64) x[i] = (u64) r[i]; + FOR(i,64) r[i] = 0; + modL(r,x); +} + +int crypto_sign(u8 *sm,u64 *smlen,const u8 *m,u64 n,const u8 *sk) +{ + u8 d[64],h[64],r[64]; + i64 i,j,x[64]; + gf p[4]; + + crypto_hash(d, sk, 32); + d[0] &= 248; + d[31] &= 127; + d[31] |= 64; + + *smlen = n+64; + FOR(i,n) sm[64 + i] = m[i]; + FOR(i,32) sm[32 + i] = d[32 + i]; + + crypto_hash(r, sm+32, n+32); + reduce(r); + scalarbase(p,r); + pack(sm,p); + + FOR(i,32) sm[i+32] = sk[i+32]; + crypto_hash(h,sm,n + 64); + reduce(h); + + FOR(i,64) x[i] = 0; + FOR(i,32) x[i] = (u64) r[i]; + FOR(i,32) FOR(j,32) x[i+j] += h[i] * (u64) d[j]; + modL(sm + 32,x); + + return 0; +} + +static int unpackneg(gf r[4],const u8 p[32]) +{ + gf t, chk, num, den, den2, den4, den6; + set25519(r[2],gf1); + unpack25519(r[1],p); + S(num,r[1]); + M(den,num,D); + Z(num,num,r[2]); + A(den,r[2],den); + + S(den2,den); + S(den4,den2); + M(den6,den4,den2); + M(t,den6,num); + M(t,t,den); + + pow2523(t,t); + M(t,t,num); + M(t,t,den); + M(t,t,den); + M(r[0],t,den); + + S(chk,r[0]); + M(chk,chk,den); + if (neq25519(chk, num)) M(r[0],r[0],I); + + S(chk,r[0]); + M(chk,chk,den); + if (neq25519(chk, num)) return -1; + + if (par25519(r[0]) == (p[31]>>7)) Z(r[0],gf0,r[0]); + + M(r[3],r[0],r[1]); + return 0; +} + +int crypto_sign_open(u8 *m,u64 *mlen,const u8 *sm,u64 n,const u8 *pk) +{ + int i; + u8 t[32],h[64]; + gf p[4],q[4]; + + *mlen = -1; + if (n < 64) return -1; + + if (unpackneg(q,pk)) return -1; + + FOR(i,n) m[i] = sm[i]; + FOR(i,32) m[i+32] = pk[i]; + crypto_hash(h,m,n); + reduce(h); + scalarmult(p,q,h); + + scalarbase(q,sm + 32); + add(p,q); + pack(t,p); + + n -= 64; + if (crypto_verify_32(sm, t)) { + FOR(i,n) m[i] = 0; + return -1; + } + + FOR(i,n) m[i] = sm[i + 64]; + *mlen = n; + return 0; +} + +//need to add this to make everything work correctly +#ifdef __cplusplus +} +#endif diff --git a/src/3rd_party/tweetnacl/tweetnacl.h b/src/3rd_party/tweetnacl/tweetnacl.h new file mode 100644 index 00000000..9277fbf8 --- /dev/null +++ b/src/3rd_party/tweetnacl/tweetnacl.h @@ -0,0 +1,272 @@ +#ifndef TWEETNACL_H +#define TWEETNACL_H +#define crypto_auth_PRIMITIVE "hmacsha512256" +#define crypto_auth crypto_auth_hmacsha512256 +#define crypto_auth_verify crypto_auth_hmacsha512256_verify +#define crypto_auth_BYTES crypto_auth_hmacsha512256_BYTES +#define crypto_auth_KEYBYTES crypto_auth_hmacsha512256_KEYBYTES +#define crypto_auth_IMPLEMENTATION crypto_auth_hmacsha512256_IMPLEMENTATION +#define crypto_auth_VERSION crypto_auth_hmacsha512256_VERSION +#define crypto_auth_hmacsha512256_tweet_BYTES 32 +#define crypto_auth_hmacsha512256_tweet_KEYBYTES 32 +extern int crypto_auth_hmacsha512256_tweet(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_auth_hmacsha512256_tweet_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *); +#define crypto_auth_hmacsha512256_tweet_VERSION "-" +#define crypto_auth_hmacsha512256 crypto_auth_hmacsha512256_tweet +#define crypto_auth_hmacsha512256_verify crypto_auth_hmacsha512256_tweet_verify +#define crypto_auth_hmacsha512256_BYTES crypto_auth_hmacsha512256_tweet_BYTES +#define crypto_auth_hmacsha512256_KEYBYTES crypto_auth_hmacsha512256_tweet_KEYBYTES +#define crypto_auth_hmacsha512256_VERSION crypto_auth_hmacsha512256_tweet_VERSION +#define crypto_auth_hmacsha512256_IMPLEMENTATION "crypto_auth/hmacsha512256/tweet" +#define crypto_box_PRIMITIVE "curve25519xsalsa20poly1305" +#define crypto_box crypto_box_curve25519xsalsa20poly1305 +#define crypto_box_open crypto_box_curve25519xsalsa20poly1305_open +#define crypto_box_keypair crypto_box_curve25519xsalsa20poly1305_keypair +#define crypto_box_beforenm crypto_box_curve25519xsalsa20poly1305_beforenm +#define crypto_box_afternm crypto_box_curve25519xsalsa20poly1305_afternm +#define crypto_box_open_afternm crypto_box_curve25519xsalsa20poly1305_open_afternm +#define crypto_box_PUBLICKEYBYTES crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES +#define crypto_box_SECRETKEYBYTES crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES +#define crypto_box_BEFORENMBYTES crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES +#define crypto_box_NONCEBYTES crypto_box_curve25519xsalsa20poly1305_NONCEBYTES +#define crypto_box_ZEROBYTES crypto_box_curve25519xsalsa20poly1305_ZEROBYTES +#define crypto_box_BOXZEROBYTES crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES +#define crypto_box_IMPLEMENTATION crypto_box_curve25519xsalsa20poly1305_IMPLEMENTATION +#define crypto_box_VERSION crypto_box_curve25519xsalsa20poly1305_VERSION +#define crypto_box_curve25519xsalsa20poly1305_tweet_PUBLICKEYBYTES 32 +#define crypto_box_curve25519xsalsa20poly1305_tweet_SECRETKEYBYTES 32 +#define crypto_box_curve25519xsalsa20poly1305_tweet_BEFORENMBYTES 32 +#define crypto_box_curve25519xsalsa20poly1305_tweet_NONCEBYTES 24 +#define crypto_box_curve25519xsalsa20poly1305_tweet_ZEROBYTES 32 +#define crypto_box_curve25519xsalsa20poly1305_tweet_BOXZEROBYTES 16 +extern int crypto_box_curve25519xsalsa20poly1305_tweet(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *); +extern int crypto_box_curve25519xsalsa20poly1305_tweet_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *); +extern int crypto_box_curve25519xsalsa20poly1305_tweet_keypair(unsigned char *,unsigned char *); +extern int crypto_box_curve25519xsalsa20poly1305_tweet_beforenm(unsigned char *,const unsigned char *,const unsigned char *); +extern int crypto_box_curve25519xsalsa20poly1305_tweet_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_box_curve25519xsalsa20poly1305_tweet_open_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +#define crypto_box_curve25519xsalsa20poly1305_tweet_VERSION "-" +#define crypto_box_curve25519xsalsa20poly1305 crypto_box_curve25519xsalsa20poly1305_tweet +#define crypto_box_curve25519xsalsa20poly1305_open crypto_box_curve25519xsalsa20poly1305_tweet_open +#define crypto_box_curve25519xsalsa20poly1305_keypair crypto_box_curve25519xsalsa20poly1305_tweet_keypair +#define crypto_box_curve25519xsalsa20poly1305_beforenm crypto_box_curve25519xsalsa20poly1305_tweet_beforenm +#define crypto_box_curve25519xsalsa20poly1305_afternm crypto_box_curve25519xsalsa20poly1305_tweet_afternm +#define crypto_box_curve25519xsalsa20poly1305_open_afternm crypto_box_curve25519xsalsa20poly1305_tweet_open_afternm +#define crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES crypto_box_curve25519xsalsa20poly1305_tweet_PUBLICKEYBYTES +#define crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES crypto_box_curve25519xsalsa20poly1305_tweet_SECRETKEYBYTES +#define crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES crypto_box_curve25519xsalsa20poly1305_tweet_BEFORENMBYTES +#define crypto_box_curve25519xsalsa20poly1305_NONCEBYTES crypto_box_curve25519xsalsa20poly1305_tweet_NONCEBYTES +#define crypto_box_curve25519xsalsa20poly1305_ZEROBYTES crypto_box_curve25519xsalsa20poly1305_tweet_ZEROBYTES +#define crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES crypto_box_curve25519xsalsa20poly1305_tweet_BOXZEROBYTES +#define crypto_box_curve25519xsalsa20poly1305_VERSION crypto_box_curve25519xsalsa20poly1305_tweet_VERSION +#define crypto_box_curve25519xsalsa20poly1305_IMPLEMENTATION "crypto_box/curve25519xsalsa20poly1305/tweet" +#define crypto_core_PRIMITIVE "salsa20" +#define crypto_core crypto_core_salsa20 +#define crypto_core_OUTPUTBYTES crypto_core_salsa20_OUTPUTBYTES +#define crypto_core_INPUTBYTES crypto_core_salsa20_INPUTBYTES +#define crypto_core_KEYBYTES crypto_core_salsa20_KEYBYTES +#define crypto_core_CONSTBYTES crypto_core_salsa20_CONSTBYTES +#define crypto_core_IMPLEMENTATION crypto_core_salsa20_IMPLEMENTATION +#define crypto_core_VERSION crypto_core_salsa20_VERSION +#define crypto_core_salsa20_tweet_OUTPUTBYTES 64 +#define crypto_core_salsa20_tweet_INPUTBYTES 16 +#define crypto_core_salsa20_tweet_KEYBYTES 32 +#define crypto_core_salsa20_tweet_CONSTBYTES 16 +extern int crypto_core_salsa20_tweet(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *); +#define crypto_core_salsa20_tweet_VERSION "-" +#define crypto_core_salsa20 crypto_core_salsa20_tweet +#define crypto_core_salsa20_OUTPUTBYTES crypto_core_salsa20_tweet_OUTPUTBYTES +#define crypto_core_salsa20_INPUTBYTES crypto_core_salsa20_tweet_INPUTBYTES +#define crypto_core_salsa20_KEYBYTES crypto_core_salsa20_tweet_KEYBYTES +#define crypto_core_salsa20_CONSTBYTES crypto_core_salsa20_tweet_CONSTBYTES +#define crypto_core_salsa20_VERSION crypto_core_salsa20_tweet_VERSION +#define crypto_core_salsa20_IMPLEMENTATION "crypto_core/salsa20/tweet" +#define crypto_core_hsalsa20_tweet_OUTPUTBYTES 32 +#define crypto_core_hsalsa20_tweet_INPUTBYTES 16 +#define crypto_core_hsalsa20_tweet_KEYBYTES 32 +#define crypto_core_hsalsa20_tweet_CONSTBYTES 16 +extern int crypto_core_hsalsa20_tweet(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *); +#define crypto_core_hsalsa20_tweet_VERSION "-" +#define crypto_core_hsalsa20 crypto_core_hsalsa20_tweet +#define crypto_core_hsalsa20_OUTPUTBYTES crypto_core_hsalsa20_tweet_OUTPUTBYTES +#define crypto_core_hsalsa20_INPUTBYTES crypto_core_hsalsa20_tweet_INPUTBYTES +#define crypto_core_hsalsa20_KEYBYTES crypto_core_hsalsa20_tweet_KEYBYTES +#define crypto_core_hsalsa20_CONSTBYTES crypto_core_hsalsa20_tweet_CONSTBYTES +#define crypto_core_hsalsa20_VERSION crypto_core_hsalsa20_tweet_VERSION +#define crypto_core_hsalsa20_IMPLEMENTATION "crypto_core/hsalsa20/tweet" +#define crypto_hashblocks_PRIMITIVE "sha512" +#define crypto_hashblocks crypto_hashblocks_sha512 +#define crypto_hashblocks_STATEBYTES crypto_hashblocks_sha512_STATEBYTES +#define crypto_hashblocks_BLOCKBYTES crypto_hashblocks_sha512_BLOCKBYTES +#define crypto_hashblocks_IMPLEMENTATION crypto_hashblocks_sha512_IMPLEMENTATION +#define crypto_hashblocks_VERSION crypto_hashblocks_sha512_VERSION +#define crypto_hashblocks_sha512_tweet_STATEBYTES 64 +#define crypto_hashblocks_sha512_tweet_BLOCKBYTES 128 +extern int crypto_hashblocks_sha512_tweet(unsigned char *,const unsigned char *,unsigned long long); +#define crypto_hashblocks_sha512_tweet_VERSION "-" +#define crypto_hashblocks_sha512 crypto_hashblocks_sha512_tweet +#define crypto_hashblocks_sha512_STATEBYTES crypto_hashblocks_sha512_tweet_STATEBYTES +#define crypto_hashblocks_sha512_BLOCKBYTES crypto_hashblocks_sha512_tweet_BLOCKBYTES +#define crypto_hashblocks_sha512_VERSION crypto_hashblocks_sha512_tweet_VERSION +#define crypto_hashblocks_sha512_IMPLEMENTATION "crypto_hashblocks/sha512/tweet" +#define crypto_hashblocks_sha256_tweet_STATEBYTES 32 +#define crypto_hashblocks_sha256_tweet_BLOCKBYTES 64 +extern int crypto_hashblocks_sha256_tweet(unsigned char *,const unsigned char *,unsigned long long); +#define crypto_hashblocks_sha256_tweet_VERSION "-" +#define crypto_hashblocks_sha256 crypto_hashblocks_sha256_tweet +#define crypto_hashblocks_sha256_STATEBYTES crypto_hashblocks_sha256_tweet_STATEBYTES +#define crypto_hashblocks_sha256_BLOCKBYTES crypto_hashblocks_sha256_tweet_BLOCKBYTES +#define crypto_hashblocks_sha256_VERSION crypto_hashblocks_sha256_tweet_VERSION +#define crypto_hashblocks_sha256_IMPLEMENTATION "crypto_hashblocks/sha256/tweet" +#define crypto_hash_PRIMITIVE "sha512" +#define crypto_hash crypto_hash_sha512 +#define crypto_hash_BYTES crypto_hash_sha512_BYTES +#define crypto_hash_IMPLEMENTATION crypto_hash_sha512_IMPLEMENTATION +#define crypto_hash_VERSION crypto_hash_sha512_VERSION +#define crypto_hash_sha512_tweet_BYTES 64 +extern int crypto_hash_sha512_tweet(unsigned char *,const unsigned char *,unsigned long long); +#define crypto_hash_sha512_tweet_VERSION "-" +#define crypto_hash_sha512 crypto_hash_sha512_tweet +#define crypto_hash_sha512_BYTES crypto_hash_sha512_tweet_BYTES +#define crypto_hash_sha512_VERSION crypto_hash_sha512_tweet_VERSION +#define crypto_hash_sha512_IMPLEMENTATION "crypto_hash/sha512/tweet" +#define crypto_hash_sha256_tweet_BYTES 32 +extern int crypto_hash_sha256_tweet(unsigned char *,const unsigned char *,unsigned long long); +#define crypto_hash_sha256_tweet_VERSION "-" +#define crypto_hash_sha256 crypto_hash_sha256_tweet +#define crypto_hash_sha256_BYTES crypto_hash_sha256_tweet_BYTES +#define crypto_hash_sha256_VERSION crypto_hash_sha256_tweet_VERSION +#define crypto_hash_sha256_IMPLEMENTATION "crypto_hash/sha256/tweet" +#define crypto_onetimeauth_PRIMITIVE "poly1305" +#define crypto_onetimeauth crypto_onetimeauth_poly1305 +#define crypto_onetimeauth_verify crypto_onetimeauth_poly1305_verify +#define crypto_onetimeauth_BYTES crypto_onetimeauth_poly1305_BYTES +#define crypto_onetimeauth_KEYBYTES crypto_onetimeauth_poly1305_KEYBYTES +#define crypto_onetimeauth_IMPLEMENTATION crypto_onetimeauth_poly1305_IMPLEMENTATION +#define crypto_onetimeauth_VERSION crypto_onetimeauth_poly1305_VERSION +#define crypto_onetimeauth_poly1305_tweet_BYTES 16 +#define crypto_onetimeauth_poly1305_tweet_KEYBYTES 32 +extern int crypto_onetimeauth_poly1305_tweet(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_onetimeauth_poly1305_tweet_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *); +#define crypto_onetimeauth_poly1305_tweet_VERSION "-" +#define crypto_onetimeauth_poly1305 crypto_onetimeauth_poly1305_tweet +#define crypto_onetimeauth_poly1305_verify crypto_onetimeauth_poly1305_tweet_verify +#define crypto_onetimeauth_poly1305_BYTES crypto_onetimeauth_poly1305_tweet_BYTES +#define crypto_onetimeauth_poly1305_KEYBYTES crypto_onetimeauth_poly1305_tweet_KEYBYTES +#define crypto_onetimeauth_poly1305_VERSION crypto_onetimeauth_poly1305_tweet_VERSION +#define crypto_onetimeauth_poly1305_IMPLEMENTATION "crypto_onetimeauth/poly1305/tweet" +#define crypto_scalarmult_PRIMITIVE "curve25519" +#define crypto_scalarmult crypto_scalarmult_curve25519 +#define crypto_scalarmult_base crypto_scalarmult_curve25519_base +#define crypto_scalarmult_BYTES crypto_scalarmult_curve25519_BYTES +#define crypto_scalarmult_SCALARBYTES crypto_scalarmult_curve25519_SCALARBYTES +#define crypto_scalarmult_IMPLEMENTATION crypto_scalarmult_curve25519_IMPLEMENTATION +#define crypto_scalarmult_VERSION crypto_scalarmult_curve25519_VERSION +#define crypto_scalarmult_curve25519_tweet_BYTES 32 +#define crypto_scalarmult_curve25519_tweet_SCALARBYTES 32 +extern int crypto_scalarmult_curve25519_tweet(unsigned char *,const unsigned char *,const unsigned char *); +extern int crypto_scalarmult_curve25519_tweet_base(unsigned char *,const unsigned char *); +#define crypto_scalarmult_curve25519_tweet_VERSION "-" +#define crypto_scalarmult_curve25519 crypto_scalarmult_curve25519_tweet +#define crypto_scalarmult_curve25519_base crypto_scalarmult_curve25519_tweet_base +#define crypto_scalarmult_curve25519_BYTES crypto_scalarmult_curve25519_tweet_BYTES +#define crypto_scalarmult_curve25519_SCALARBYTES crypto_scalarmult_curve25519_tweet_SCALARBYTES +#define crypto_scalarmult_curve25519_VERSION crypto_scalarmult_curve25519_tweet_VERSION +#define crypto_scalarmult_curve25519_IMPLEMENTATION "crypto_scalarmult/curve25519/tweet" +#define crypto_secretbox_PRIMITIVE "xsalsa20poly1305" +#define crypto_secretbox crypto_secretbox_xsalsa20poly1305 +#define crypto_secretbox_open crypto_secretbox_xsalsa20poly1305_open +#define crypto_secretbox_KEYBYTES crypto_secretbox_xsalsa20poly1305_KEYBYTES +#define crypto_secretbox_NONCEBYTES crypto_secretbox_xsalsa20poly1305_NONCEBYTES +#define crypto_secretbox_ZEROBYTES crypto_secretbox_xsalsa20poly1305_ZEROBYTES +#define crypto_secretbox_BOXZEROBYTES crypto_secretbox_xsalsa20poly1305_BOXZEROBYTES +#define crypto_secretbox_IMPLEMENTATION crypto_secretbox_xsalsa20poly1305_IMPLEMENTATION +#define crypto_secretbox_VERSION crypto_secretbox_xsalsa20poly1305_VERSION +#define crypto_secretbox_xsalsa20poly1305_tweet_KEYBYTES 32 +#define crypto_secretbox_xsalsa20poly1305_tweet_NONCEBYTES 24 +#define crypto_secretbox_xsalsa20poly1305_tweet_ZEROBYTES 32 +#define crypto_secretbox_xsalsa20poly1305_tweet_BOXZEROBYTES 16 +extern int crypto_secretbox_xsalsa20poly1305_tweet(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_secretbox_xsalsa20poly1305_tweet_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +#define crypto_secretbox_xsalsa20poly1305_tweet_VERSION "-" +#define crypto_secretbox_xsalsa20poly1305 crypto_secretbox_xsalsa20poly1305_tweet +#define crypto_secretbox_xsalsa20poly1305_open crypto_secretbox_xsalsa20poly1305_tweet_open +#define crypto_secretbox_xsalsa20poly1305_KEYBYTES crypto_secretbox_xsalsa20poly1305_tweet_KEYBYTES +#define crypto_secretbox_xsalsa20poly1305_NONCEBYTES crypto_secretbox_xsalsa20poly1305_tweet_NONCEBYTES +#define crypto_secretbox_xsalsa20poly1305_ZEROBYTES crypto_secretbox_xsalsa20poly1305_tweet_ZEROBYTES +#define crypto_secretbox_xsalsa20poly1305_BOXZEROBYTES crypto_secretbox_xsalsa20poly1305_tweet_BOXZEROBYTES +#define crypto_secretbox_xsalsa20poly1305_VERSION crypto_secretbox_xsalsa20poly1305_tweet_VERSION +#define crypto_secretbox_xsalsa20poly1305_IMPLEMENTATION "crypto_secretbox/xsalsa20poly1305/tweet" +#define crypto_sign_PRIMITIVE "ed25519" +#define crypto_sign crypto_sign_ed25519 +#define crypto_sign_open crypto_sign_ed25519_open +#define crypto_sign_keypair crypto_sign_ed25519_keypair +#define crypto_sign_BYTES crypto_sign_ed25519_BYTES +#define crypto_sign_PUBLICKEYBYTES crypto_sign_ed25519_PUBLICKEYBYTES +#define crypto_sign_SECRETKEYBYTES crypto_sign_ed25519_SECRETKEYBYTES +#define crypto_sign_IMPLEMENTATION crypto_sign_ed25519_IMPLEMENTATION +#define crypto_sign_VERSION crypto_sign_ed25519_VERSION +#define crypto_sign_ed25519_tweet_BYTES 64 +#define crypto_sign_ed25519_tweet_PUBLICKEYBYTES 32 +#define crypto_sign_ed25519_tweet_SECRETKEYBYTES 64 +extern int crypto_sign_ed25519_tweet(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_sign_ed25519_tweet_open(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *); +extern int crypto_sign_ed25519_tweet_keypair(unsigned char *,unsigned char *); +#define crypto_sign_ed25519_tweet_VERSION "-" +#define crypto_sign_ed25519 crypto_sign_ed25519_tweet +#define crypto_sign_ed25519_open crypto_sign_ed25519_tweet_open +#define crypto_sign_ed25519_keypair crypto_sign_ed25519_tweet_keypair +#define crypto_sign_ed25519_BYTES crypto_sign_ed25519_tweet_BYTES +#define crypto_sign_ed25519_PUBLICKEYBYTES crypto_sign_ed25519_tweet_PUBLICKEYBYTES +#define crypto_sign_ed25519_SECRETKEYBYTES crypto_sign_ed25519_tweet_SECRETKEYBYTES +#define crypto_sign_ed25519_VERSION crypto_sign_ed25519_tweet_VERSION +#define crypto_sign_ed25519_IMPLEMENTATION "crypto_sign/ed25519/tweet" +#define crypto_stream_PRIMITIVE "xsalsa20" +#define crypto_stream crypto_stream_xsalsa20 +#define crypto_stream_xor crypto_stream_xsalsa20_xor +#define crypto_stream_KEYBYTES crypto_stream_xsalsa20_KEYBYTES +#define crypto_stream_NONCEBYTES crypto_stream_xsalsa20_NONCEBYTES +#define crypto_stream_IMPLEMENTATION crypto_stream_xsalsa20_IMPLEMENTATION +#define crypto_stream_VERSION crypto_stream_xsalsa20_VERSION +#define crypto_stream_xsalsa20_tweet_KEYBYTES 32 +#define crypto_stream_xsalsa20_tweet_NONCEBYTES 24 +extern int crypto_stream_xsalsa20_tweet(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_stream_xsalsa20_tweet_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +#define crypto_stream_xsalsa20_tweet_VERSION "-" +#define crypto_stream_xsalsa20 crypto_stream_xsalsa20_tweet +#define crypto_stream_xsalsa20_xor crypto_stream_xsalsa20_tweet_xor +#define crypto_stream_xsalsa20_KEYBYTES crypto_stream_xsalsa20_tweet_KEYBYTES +#define crypto_stream_xsalsa20_NONCEBYTES crypto_stream_xsalsa20_tweet_NONCEBYTES +#define crypto_stream_xsalsa20_VERSION crypto_stream_xsalsa20_tweet_VERSION +#define crypto_stream_xsalsa20_IMPLEMENTATION "crypto_stream/xsalsa20/tweet" +#define crypto_stream_salsa20_tweet_KEYBYTES 32 +#define crypto_stream_salsa20_tweet_NONCEBYTES 8 +extern int crypto_stream_salsa20_tweet(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +extern int crypto_stream_salsa20_tweet_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *); +#define crypto_stream_salsa20_tweet_VERSION "-" +#define crypto_stream_salsa20 crypto_stream_salsa20_tweet +#define crypto_stream_salsa20_xor crypto_stream_salsa20_tweet_xor +#define crypto_stream_salsa20_KEYBYTES crypto_stream_salsa20_tweet_KEYBYTES +#define crypto_stream_salsa20_NONCEBYTES crypto_stream_salsa20_tweet_NONCEBYTES +#define crypto_stream_salsa20_VERSION crypto_stream_salsa20_tweet_VERSION +#define crypto_stream_salsa20_IMPLEMENTATION "crypto_stream/salsa20/tweet" +#define crypto_verify_PRIMITIVE "16" +#define crypto_verify crypto_verify_16 +#define crypto_verify_BYTES crypto_verify_16_BYTES +#define crypto_verify_IMPLEMENTATION crypto_verify_16_IMPLEMENTATION +#define crypto_verify_VERSION crypto_verify_16_VERSION +#define crypto_verify_16_tweet_BYTES 16 +extern int crypto_verify_16_tweet(const unsigned char *,const unsigned char *); +#define crypto_verify_16_tweet_VERSION "-" +#define crypto_verify_16 crypto_verify_16_tweet +#define crypto_verify_16_BYTES crypto_verify_16_tweet_BYTES +#define crypto_verify_16_VERSION crypto_verify_16_tweet_VERSION +#define crypto_verify_16_IMPLEMENTATION "crypto_verify/16/tweet" +#define crypto_verify_32_tweet_BYTES 32 +extern int crypto_verify_32_tweet(const unsigned char *,const unsigned char *); +#define crypto_verify_32_tweet_VERSION "-" +#define crypto_verify_32 crypto_verify_32_tweet +#define crypto_verify_32_BYTES crypto_verify_32_tweet_BYTES +#define crypto_verify_32_VERSION crypto_verify_32_tweet_VERSION +#define crypto_verify_32_IMPLEMENTATION "crypto_verify/32/tweet" +#endif diff --git a/src/Amalgam/Amalgam.h b/src/Amalgam/Amalgam.h new file mode 100644 index 00000000..a2cab244 --- /dev/null +++ b/src/Amalgam/Amalgam.h @@ -0,0 +1,76 @@ +#pragma once + +//system headers: +#include + +#if defined(_MSC_VER) +//Microsoft +#define AMALGAM_EXPORT __declspec(dllexport) +#elif defined(__GNUC__) +//GCC +#define AMALGAM_EXPORT __attribute__((visibility("default"))) +#else +#define AMALGAM_EXPORT +#endif + +extern "C" +{ + //loads the entity specified into handle + AMALGAM_EXPORT bool LoadEntity(char *handle, char *path, bool persistent, bool load_contained_entities, char *write_log_filename, char *print_log_filename); + + //stores the entity specified by handle into path + AMALGAM_EXPORT void StoreEntity(char *handle, char *path, bool update_persistence_location = false, bool store_contained_entities = true); + + //executes label on handle + AMALGAM_EXPORT void ExecuteEntity(char *handle, char *label); + + //deletes the entity specified by handle + AMALGAM_EXPORT void DeleteEntity(char *handle); + + //sets the random seed for the entity specified by handle + AMALGAM_EXPORT bool SetRandomSeed(char *handle, char *rand_seed); + + //sets num_entities to the number of entities and allocates an array of string pointers for the handles loaded + AMALGAM_EXPORT char **GetEntities(uint64_t *num_entities); + + AMALGAM_EXPORT double GetNumberValue(char *handle, char *label); + AMALGAM_EXPORT void AppendNumberValue(char *handle, char *label, double value); + AMALGAM_EXPORT void SetNumberValue(char *handle, char *label, double value); + + AMALGAM_EXPORT size_t PrepStringValueToTransferBuffer(char *handle, char *label); + AMALGAM_EXPORT void AppendStringValue(char *handle, char *label, char *value); + AMALGAM_EXPORT void SetStringValue(char *handle, char *label, char *value); + + AMALGAM_EXPORT double *GetNumberListPtr(char *handle, char *label); + AMALGAM_EXPORT size_t GetNumberListLength(char *handle, char *label); + AMALGAM_EXPORT void GetNumberList(char *handle, char *label, double *out_list); + AMALGAM_EXPORT void AppendNumberList(char *handle, char *label, double *list, size_t len); + AMALGAM_EXPORT void SetNumberList(char *handle, char *label, double *list, size_t len); + + // IMPORTANT: GetStringList assumes that the char ** array is unallocated + // If there are allocated char *s inside, they will become inacessable and be a memory leak. + AMALGAM_EXPORT size_t GetStringListLength(char *handle, char *label); + AMALGAM_EXPORT wchar_t **GetStringListPtrWide(char *handle, char *label); + AMALGAM_EXPORT char **GetStringListPtr(char *handle, char *label); + AMALGAM_EXPORT void AppendStringList(char *handle, char *label, char **list, size_t len); + AMALGAM_EXPORT void SetStringList(char *handle, char *label, char **list, size_t len); + + AMALGAM_EXPORT void SetJSONToLabel(char *handle, char *label, char *json); + + AMALGAM_EXPORT wchar_t *GetJSONPtrFromLabelWide(char *handle, char *label); + AMALGAM_EXPORT char *GetJSONPtrFromLabel(char *handle, char *label); + + AMALGAM_EXPORT wchar_t *ExecuteEntityJsonPtrWide(char *handle, char *label, char *json); + AMALGAM_EXPORT char *ExecuteEntityJsonPtr(char *handle, char *label, char *json); + + AMALGAM_EXPORT wchar_t *GetVersionStringWide(); + AMALGAM_EXPORT char *GetVersionString(); + + AMALGAM_EXPORT wchar_t* GetConcurrencyTypeStringWide(); + AMALGAM_EXPORT char* GetConcurrencyTypeString(); + + AMALGAM_EXPORT void SetSBFDataStoreEnabled(bool enable_SBF_datastore); + AMALGAM_EXPORT bool IsSBFDataStoreEnabled(); + AMALGAM_EXPORT size_t GetMaxNumThreads(); + AMALGAM_EXPORT void SetMaxNumThreads(size_t max_num_threads); +} diff --git a/src/Amalgam/Amalgam.vcxproj b/src/Amalgam/Amalgam.vcxproj new file mode 100644 index 00000000..2399f42d --- /dev/null +++ b/src/Amalgam/Amalgam.vcxproj @@ -0,0 +1,666 @@ + + + + + ST_Debug_EXE + x64 + + + MT_Debug_DLL + x64 + + + MT_Debug_EXE + x64 + + + MT_Release_DLL + x64 + + + MT_Release_EXE + x64 + + + OMP_Release_DLL + x64 + + + OMP_Release_EXE + x64 + + + ST_Release_EXE + x64 + + + ST_Debug_DLL + x64 + + + ST_Release_DLL + x64 + + + + {640515C4-B87F-4210-A603-F091804FAE5A} + Amalgam + 10.0 + + + + Application + true + Unicode + v143 + + + Application + true + Unicode + v143 + + + Application + false + true + Unicode + v143 + + + Application + false + true + Unicode + v143 + + + Application + false + true + Unicode + v143 + + + DynamicLibrary + true + Unicode + v143 + + + DynamicLibrary + true + Unicode + v143 + + + DynamicLibrary + false + true + Unicode + v143 + + + DynamicLibrary + false + true + Unicode + v143 + + + DynamicLibrary + false + true + Unicode + v143 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + false + AllRules.ruleset + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + false + AllRules.ruleset + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + false + AllRules.ruleset + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + false + AllRules.ruleset + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + false + AllRules.ruleset + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + false + AllRules.ruleset + $(SolutionDir)$(Platform)\$(Configuration)\ + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + false + AllRules.ruleset + $(SolutionDir)$(Platform)\$(Configuration)\ + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + false + AllRules.ruleset + $(SolutionDir)$(Platform)\$(Configuration)\ + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + false + AllRules.ruleset + $(SolutionDir)$(Platform)\$(Configuration)\ + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + false + AllRules.ruleset + $(SolutionDir)$(Platform)\$(Configuration)\ + ClCompile + $(SolutionDir)/src/3rd_party;$(ProjectDir);$(ProjectDir)/entity;$(ProjectDir)/evaluablenode;$(ProjectDir)/importexport;$(ProjectDir)/interpreter;$(ProjectDir)/rand;$(ProjectDir)/string;$(VC_IncludePath);$(WindowsSDK_IncludePath) + + + + Level3 + Disabled + false + %(PreprocessorDefinitions); + MultiThreadedDebug + stdcpp17 + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + AdvancedVectorExtensions2 + true + + + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + Level3 + Disabled + false + MULTITHREAD_SUPPORT;%(PreprocessorDefinitions); + MultiThreadedDebug + stdcpp17 + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + AdvancedVectorExtensions2 + true + + + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + Level3 + Full + true + true + false + Speed + true + true + AnySuitable + MultiThreaded + %(PreprocessorDefinitions); + AssemblyAndSourceCode + AdvancedVectorExtensions2 + stdcpp17 + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + true + + + true + true + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + Level3 + Full + true + true + false + Speed + true + true + AnySuitable + MultiThreaded + %(PreprocessorDefinitions); + AssemblyAndSourceCode + AdvancedVectorExtensions2 + stdcpp17 + true + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + true + + + true + true + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + Level3 + Full + true + true + false + Speed + true + true + AnySuitable + MultiThreaded + MULTITHREAD_SUPPORT;%(PreprocessorDefinitions); + AssemblyAndSourceCode + AdvancedVectorExtensions2 + stdcpp17 + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + true + + + true + true + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + Level3 + Disabled + false + %(PreprocessorDefinitions); + MultiThreadedDebug + stdcpp17 + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + AdvancedVectorExtensions2 + true + + + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + Level3 + Disabled + false + MULTITHREAD_SUPPORT;%(PreprocessorDefinitions); + + + MultiThreadedDebug + stdcpp17 + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + AdvancedVectorExtensions2 + true + + + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + Level3 + Full + true + true + false + Speed + true + true + AnySuitable + MultiThreaded + %(PreprocessorDefinitions); + stdcpp17 + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + AdvancedVectorExtensions2 + true + + + true + true + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + Level3 + Full + true + true + false + Speed + true + true + AnySuitable + MultiThreaded + %(PreprocessorDefinitions); + stdcpp17 + true + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + AdvancedVectorExtensions2 + true + + + true + true + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + Level3 + Full + true + true + false + Speed + true + true + AnySuitable + MultiThreaded + MULTITHREAD_SUPPORT;%(PreprocessorDefinitions); + stdcpp17 + 26496;26446;26490;26467;4530;26481;26440;26439;26447;26432;26115;26110;26135;26482;26429;26455;26461;26426;26409;26460;26400;26472;26403;26476;26495;26433;26401;26497;26819;26462;26457 + Sync + AdvancedVectorExtensions2 + true + + + true + true + true + true + kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + 67108864 + Console + + + $(ProjectDir)AmalgamVersion.h + powershell -nologo -File "$(SolutionDir)build/powershell/Create-Amalgam-Version-Header.ps1" + Creating Version header + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/Amalgam/Amalgam.vcxproj.filters b/src/Amalgam/Amalgam.vcxproj.filters new file mode 100644 index 00000000..c8a8942c --- /dev/null +++ b/src/Amalgam/Amalgam.vcxproj.filters @@ -0,0 +1,361 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Resource Files + + + + + Resource Files + + + + + + \ No newline at end of file diff --git a/src/Amalgam/AmalgamAPI.cpp b/src/Amalgam/AmalgamAPI.cpp new file mode 100644 index 00000000..717e77eb --- /dev/null +++ b/src/Amalgam/AmalgamAPI.cpp @@ -0,0 +1,437 @@ +//project headers: +#include "Amalgam.h" +#include "AmalgamVersion.h" +#include "Concurrency.h" +#include "EntityExternalInterface.h" +#include "EntityQueries.h" + +//system headers: +#include +#include +#include +#include +#include +#include + +//Workaround because GCC doesn't support strcpy_s +// TODO 15993: Reevaluate when moving to C++20 +#if defined(__GNUC__) +#define strcpy_s(dest, size, source) {strncpy( (dest), (source), (size)); (dest)[(size) - 1] = '\0'; } +#endif + +EntityExternalInterface entint; + +//binary's concurrency build type +std::string ConcurrencyType() +{ + return +#if defined(MULTITHREAD_SUPPORT) + "MultiThreaded" +#elif defined(_OPENMP) + "OpenMP" +#elif defined(MULTITHREAD_SUPPORT) && defined(_OPENMP) + "MultiThreaded+OpenMP" +#else + "SingleThreaded" +#endif + ; +} + +extern "C" +{ + // ************************************ + // helper functions (not in API) + // ************************************ + + char* StringToCharPtr(std::string& value) + { + char* out = new char[value.length() + 1]; + strcpy_s(out, value.length() + 1, value.c_str()); + return out; + } + + wchar_t* StringToWCharPtr(std::string& value) + { + std::wstring widestr = std::wstring(value.begin(), value.end()); + widestr += (wchar_t)0; + wchar_t* wct = new wchar_t[widestr.length()]; + + //The below call is depricated but medium risk since the buffer is generated within the function + //and length of the string is tracked. This still could pose a vulnerability with malicious unicode + //however and an alternative that returns with minimal amount of allocations that secure should + //be explored. wcsncpy_s was explored as an option but is not guaranteed to exist in the STL for + //linux. +#ifdef _MSC_VER +#pragma warning( push ) +#pragma warning( disable: 4996 ) +#endif + wcsncpy(wct, widestr.c_str(), widestr.length()); +#ifdef _MSC_VER +#pragma warning( pop ) +#endif + return wct; + } + + // ************************************ + // api methods + // ************************************ + + bool LoadEntity(char *handle, char *path, bool persistent, bool load_contained_entities, char *write_log_filename, char *print_log_filename) + { + std::string h(handle); + std::string p(path); + std::string wlfname(write_log_filename); + std::string plfname(print_log_filename); + + return entint.LoadEntity(h, p, persistent, load_contained_entities, wlfname, plfname); + } + + void StoreEntity(char *handle, char *path, bool update_persistence_location, bool store_contained_entities) + { + std::string h(handle); + std::string p(path); + + entint.StoreEntity(h, p, update_persistence_location, store_contained_entities); + } + + void SetJSONToLabel(char *handle, char *label, char *json) + { + std::string h(handle); + std::string l(label); + std::string_view j(json); + + entint.SetJSONToLabel(h, l, j); + } + + wchar_t *GetJSONPtrFromLabelWide(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + std::string ret = entint.GetJSONFromLabel(h, l); + return StringToWCharPtr(ret); + } + + char *GetJSONPtrFromLabel(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + std::string ret = entint.GetJSONFromLabel(h, l); + return StringToCharPtr(ret); + } + + wchar_t *GetVersionStringWide() + { + std::string version(AMALGAM_VERSION_STRING); + return StringToWCharPtr(version); + } + + char *GetVersionString() + { + std::string version(AMALGAM_VERSION_STRING); + return StringToCharPtr(version); + } + + wchar_t* GetConcurrencyTypeStringWide() + { + std::string ct = ConcurrencyType(); + return StringToWCharPtr(ct); + } + + char* GetConcurrencyTypeString() + { + std::string ct = ConcurrencyType(); + return StringToCharPtr(ct); + } + + wchar_t *ExecuteEntityJsonPtrWide(char *handle, char *label, char *json) + { + std::string h(handle); + std::string l(label); + std::string_view j(json); + std::string ret = entint.ExecuteEntityJSON(h, l, j); + return StringToWCharPtr(ret); + } + + char *ExecuteEntityJsonPtr(char *handle, char *label, char *json) + { + std::string h(handle); + std::string l(label); + std::string_view j(json); + std::string ret = entint.ExecuteEntityJSON(h, l, j); + return StringToCharPtr(ret); + } + + void ExecuteEntity(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + entint.ExecuteEntity(h, l); + } + + void DeleteEntity(char *handle) + { + std::string h(handle); + entint.DeleteEntity(h); + } + + bool SetRandomSeed(char *handle, char *rand_seed) + { + std::string h(handle); + std::string s(rand_seed); + return entint.SetRandomSeed(h, s); + } + + char **GetEntities(uint64_t *num_entities) + { + std::vector entities = entint.GetEntities(); + *num_entities = entities.size(); + char **return_entities = new char *[entities.size()]; + for(size_t i = 0; i < entities.size(); i++) + { + auto &handle = entities[i]; + char *new_string = new char[handle.size() + 1]; + for(size_t j = 0; j < handle.size(); j++) + new_string[j] = handle[j]; + new_string[handle.size()] = '\0'; + + return_entities[i] = new_string; + } + + return return_entities; + } + + // ************************************ + // get, set, and append numbers + // ************************************ + + double GetNumberValue(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + return entint.GetNumber(h, l); + } + + void AppendNumberValue(char *handle, char *label, double value) + { + std::string h(handle); + std::string l(label); + + entint.AppendToLabel(h, l, value); + } + + void SetNumberValue(char *handle, char *label, double value) + { + std::string h(handle); + std::string l(label); + + entint.SetLabel(h, l, value); + } + + void AppendStringValue(char *handle, char *label, char *value) + { + std::string h(handle); + std::string l(label); + std::string v(value); + + entint.AppendToLabel(h, l, v); + } + + void SetStringValue(char *handle, char *label, char *value) + { + std::string h(handle); + std::string l(label); + std::string v(value); + + entint.SetLabel(h, l, v); + } + + void AppendNumberList(char *handle, char *label, double *list, size_t len) + { + std::string h(handle); + std::string l(label); + + for(size_t i = 0; i < len; i++) + { + entint.AppendToLabel(h, l, list[i]); + } + } + + void SetNumberList(char *handle, char *label, double *list, size_t len) + { + std::string h(handle); + std::string l(label); + + entint.SetNumberList(h, l, list, len); + } + + void SetNumberMatrix(char *handle, char *label, double *list, size_t rows, size_t columns) + { + std::string h(handle); + std::string l(label); + + entint.SetNumberMatrix(h, l, list, rows, columns); + } + + size_t GetNumberListLength(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + return entint.GetNumberListLength(h, l); + } + + size_t GetNumberMatrixWidth(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + return entint.GetNumberMatrixWidth(h, l); + } + + size_t GetNumberMatrixHeight(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + return entint.GetNumberMatrixHeight(h, l); + } + + double *GetNumberListPtr(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + size_t len = GetNumberListLength(handle, label); + + double *ret = new double[len]; + + entint.GetNumberList(h, l, ret, len); + return ret; + } + + double *GetNumberMatrixPtr(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + size_t width = GetNumberMatrixWidth(handle, label); + size_t height = GetNumberMatrixHeight(handle, label); + + double *ret = new double[width*height]; + + entint.GetNumberMatrix(h, l, ret, width, height); + return ret; + } + + void GetNumberList(char *handle, char *label, double *out_list) + { + std::string h(handle); + std::string l(label); + + size_t len = GetNumberListLength(handle, label); + + entint.GetNumberList(h, l, out_list, len); + } + + void AppendStringList(char *handle, char *label, char **list, size_t len) + { + std::string h(handle); + std::string l(label); + + for(size_t i = 0; i < len; i++) + { + std::string to_append(list[i]); + entint.AppendToLabel(h, l, to_append); + } + } + + void SetStringList(char *handle, char *label, char **list, size_t len) + { + std::string h(handle); + std::string l(label); + + entint.SetStringList(h, l, list, len); + } + + size_t GetStringListLength(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + return entint.GetStringListLength(h, l); + } + + // IMPORTANT: GetStringList assumes that the char ** array is unallocated + // If there are allocated char *s inside, they will become inacessable and be a memory leak. + wchar_t **GetStringListPtrWide(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + size_t len = GetStringListLength(handle, label); + + std::string *str_list = new std::string[len]; + + entint.GetStringList(h, l, str_list, len); + + wchar_t **wct = new wchar_t *[len]; + for(size_t i = 0; i < len; i++) + { + wct[i] = StringToWCharPtr(str_list[i]); + } + + return wct; + } + + // IMPORTANT: GetStringList assumes that the char ** array is unallocated + // If there are allocated char *s inside, they will become inacessable and be a memory leak. + char **GetStringListPtr(char *handle, char *label) + { + std::string h(handle); + std::string l(label); + + size_t len = GetStringListLength(handle, label); + + std::string *str_list = new std::string[len]; + + entint.GetStringList(h, l, str_list, len); + + char **ct = new char *[len]; + for(size_t i = 0; i < len; i++) + ct[i] = StringToCharPtr(str_list[i]); + + return ct; + } + + // ************************************ + // Amalgam Engine Flags + // ************************************ + + void SetSBFDataStoreEnabled(bool enable_SBF_datastore) + { + _enable_SBF_datastore = enable_SBF_datastore; + } + + bool IsSBFDataStoreEnabled() + { + return _enable_SBF_datastore; + } + + size_t GetMaxNumThreads() + { + #if defined(MULTITHREAD_SUPPORT) || defined(_OPENMP) + return Concurrency::GetMaxNumThreads(); + #else + return 1; + #endif + } + + void SetMaxNumThreads(size_t max_num_threads) + { + #if defined(MULTITHREAD_SUPPORT) || defined(_OPENMP) + Concurrency::SetMaxNumThreads(max_num_threads); + #endif + } +} diff --git a/src/Amalgam/AmalgamMain.cpp b/src/Amalgam/AmalgamMain.cpp new file mode 100644 index 00000000..dd58df02 --- /dev/null +++ b/src/Amalgam/AmalgamMain.cpp @@ -0,0 +1,326 @@ +//project headers: +#include "Amalgam.h" +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "Concurrency.h" +#include "Entity.h" +#include "EntityExternalInterface.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EntityWriteListener.h" +#include "EvaluableNode.h" +#include "EvaluableNodeTreeFunctions.h" +#include "Parser.h" +#include "PerformanceProfiler.h" +#include "PlatformSpecific.h" + +//system headers: +#include +#include +#include +#include + +int RunAmalgamTrace(std::istream *in_stream, std::ostream *out_stream, std::string &random_seed); + +void PrintProfilingInformationIfApplicable() +{ + if(performance_profiler.IsProfilingEnabled()) + { + size_t max_num_perf_counters_to_display = 20; + std::cout << "Operations that took the longest total time (s): " << std::endl; + auto longest_total_time = performance_profiler.GetNumCallsByTotalTime(); + for(size_t i = 0; i < max_num_perf_counters_to_display && i < longest_total_time.size(); i++) + std::cout << longest_total_time[i].first << ": " << longest_total_time[i].second << std::endl; + std::cout << std::endl; + + std::cout << "Operations called the most number of times: " << std::endl; + auto most_calls = performance_profiler.GetNumCallsByType(); + for(size_t i = 0; i < max_num_perf_counters_to_display && i < most_calls.size(); i++) + std::cout << most_calls[i].first << ": " << most_calls[i].second << std::endl; + std::cout << std::endl; + + std::cout << "Operations that took the longest average time (s): " << std::endl; + auto longest_ave_time = performance_profiler.GetNumCallsByAveTime(); + for(size_t i = 0; i < max_num_perf_counters_to_display && i < longest_ave_time.size(); i++) + std::cout << longest_ave_time[i].first << ": " << longest_ave_time[i].second << std::endl; + std::cout << std::endl; + + std::cout << "Operations that increased the memory usage the most in total (nodes): " << std::endl; + auto most_total_memory = performance_profiler.GetNumCallsByTotalMemoryIncrease(); + for(size_t i = 0; i < max_num_perf_counters_to_display && i < most_total_memory.size(); i++) + std::cout << most_total_memory[i].first << ": " << most_total_memory[i].second << std::endl; + std::cout << std::endl; + + std::cout << "Operations that increased the memory usage the most on average (nodes): " << std::endl; + auto most_ave_memory = performance_profiler.GetNumCallsByAveMemoryIncrease(); + for(size_t i = 0; i < max_num_perf_counters_to_display && i < most_ave_memory.size(); i++) + std::cout << most_ave_memory[i].first << ": " << most_ave_memory[i].second << std::endl; + std::cout << std::endl; + + std::cout << "Operations that decreased the memory usage the most in total (nodes): " << std::endl; + for(size_t i = 0; i < max_num_perf_counters_to_display && i < most_total_memory.size(); i++) + { + //only write out those that had a net decrease + double mem_delta = most_total_memory[most_total_memory.size() - 1 - i].second; + if(mem_delta >= 0) + break; + std::cout << most_total_memory[i].first << ": " << mem_delta << std::endl; + } + std::cout << std::endl; + + std::cout << "Operations that decreased the memory usage the most on average (nodes): " << std::endl; + for(size_t i = 0; i < max_num_perf_counters_to_display && i < most_ave_memory.size(); i++) + { + //only write out those that had a net decrease + double mem_delta = most_ave_memory[most_total_memory.size() - 1 - i].second; + if(mem_delta >= 0) + break; + std::cout << most_total_memory[i].first << ": " << mem_delta << std::endl; + } + std::cout << std::endl; + + std::cout << "Total number of operations: " << performance_profiler.GetTotalNumCalls() << std::endl; + + auto [total_mem_increase, positive_mem_increase] = performance_profiler.GetTotalAndPositiveMemoryIncreases(); + std::cout << "Net number of nodes allocated: " << total_mem_increase << std::endl; + std::cout << "Total node increases: " << positive_mem_increase << std::endl; + } +} + +PLATFORM_MAIN_CONSOLE +{ + PLATFORM_ARGS_CONSOLE; + + if(args.size() == 1) + { + std::cout + << "Concurrency type: " << GetConcurrencyTypeString() << std::endl + << "Must specify an input file. Flags:" << std::endl + << "-l [filename]: specify a debug log file." << std::endl + #if defined(INTERPRETER_PROFILE_OPCODES) || defined(INTERPRETER_PROFILE_LABELS_CALLED) + << "-p: display engine performance counters upon completion" << std::endl + #endif + << "-s [random number seed]: specify a particular random number seed -- can be any alphanumeric string." << std::endl + << "-t [filename]: specify a code-based transaction log file." << std::endl + #if defined(MULTITHREAD_SUPPORT) || defined(_OPENMP) + << "--numthreads [number]: maximum number of threads to use (if unspecified or set to zero, may use unlimited)." << std::endl + #endif + << "--debug: when specified, begins in debugging mode." << std::endl + << "--debug-minimal: when specified, begins in debugging mode with minimal output while stepping." << std::endl + << "--debug-sources: when specified, prepends all node comments with the source of the node when applicable." << std::endl + << "--nosbfds: disables the sbfds acceleration, which is generally preferred in the heuristics." << std::endl + << "--trace: uses commands via stdio to act as if it were being called as a library." << std::endl + << "--tracefile [file]: like trace, but pulls the data from the file specified." << std::endl + << "--version: prints the current version." << std::endl; + return 0; + } + + //run options + bool debug_state = false; + bool debug_minimal = false; + bool debug_sources = false; + bool run_trace = false; + bool run_tracefile = false; + std::string tracefile; + std::string amlg_file_to_run; + bool print_to_stdio = true; + std::string write_log_filename; + std::string print_log_filename; +#if defined(MULTITHREAD_SUPPORT) || defined(_OPENMP) + size_t num_threads = 0; +#endif + + typedef std::chrono::steady_clock clk; + auto t = std::chrono::duration_cast(clk::now().time_since_epoch()).count(); + std::string random_seed = std::to_string(t); + if(Platform_IsDebuggerPresent()) + random_seed = "01234567890123456789012345"; + + //parameters to be passed into the code being run + std::string interpreter_path{args[0]}; + std::vector passthrough_params; + passthrough_params.emplace_back(""); //add placeholder for script name + + for(size_t i = 1; i < args.size(); i++) + { + if(args[i] == "-l" && i + 1 < args.size()) + { + print_log_filename = args[++i]; + } + #if defined(INTERPRETER_PROFILE_OPCODES) || defined(INTERPRETER_PROFILE_LABELS_CALLED) + else if(args[i] == "-p") + performance_profiler.EnableProfiling(); + #endif + else if(args[i] == "-q") + print_to_stdio = false; + else if(args[i] == "-s" && i + 1 < args.size()) + random_seed = args[++i]; + else if(args[i] == "-t" && i + 1 < args.size()) + write_log_filename = args[++i]; + #if defined(MULTITHREAD_SUPPORT) || defined(_OPENMP) + else if(args[i] == "--numthreads") + num_threads = static_cast(std::max(std::atoi(args[++i].data()), 0)); + #endif + else if(args[i] == "--debug") + debug_state = true; + else if(args[i] == "--debug-minimal") + { + debug_state = true; + debug_minimal = true; + } + else if(args[i] == "--debug-sources") + debug_sources = true; + else if(args[i] == "--nosbfds") + _enable_SBF_datastore = false; + else if(args[i] == "--trace") + run_trace = true; + else if(args[i] == "--tracefile" && i + 1 < args.size()) + { + run_tracefile = true; + tracefile = args[++i]; + } + else if(args[i] == "--version") + std::cout << "Amalgam Version: " << AMALGAM_VERSION_STRING << std::endl; + else if(amlg_file_to_run == "") + { + //if relative path, prepend current working dir to make absolute path + //path is not converted to canonical path to preserve user's input + std::filesystem::path file(args[i]); + if(file.is_relative()) + file = std::filesystem::current_path() / file; + + amlg_file_to_run = file.string(); + passthrough_params[0] = amlg_file_to_run; + } + else //add on to passthrough params + passthrough_params.emplace_back(args[i]); + } + +#if defined(MULTITHREAD_SUPPORT) || defined(_OPENMP) + Concurrency::SetMaxNumThreads(num_threads); +#endif + + if(debug_state) + Interpreter::SetDebuggingState(true); + + if(debug_sources) + asset_manager.debugSources = true; + + if(debug_minimal) + asset_manager.debugMinimal = true; + + if(run_trace) + { + return RunAmalgamTrace(&std::cin, &std::cout, random_seed); + } + else if(run_tracefile) + { + std::istream *trace_stream = new std::ifstream(tracefile); + int ret = RunAmalgamTrace(trace_stream, &std::cout, random_seed); + delete trace_stream; + + PrintProfilingInformationIfApplicable(); + return ret; + } + else + { + //run the standard amlg command line interface + std::string file_type = ""; + Entity *entity = asset_manager.LoadEntityFromResourcePath(amlg_file_to_run, file_type, false, true, false, true, random_seed); + if(entity == nullptr) + return 0; + + asset_manager.SetRootPermission(entity, true); + + ExecutionCycleCount num_steps_executed = 0; + size_t num_nodes_allocated = 0; + PrintListener *print_listener = nullptr; + std::vector write_listeners; + + if(Platform_IsDebuggerPresent()) + { + print_listener = new PrintListener("out.txt", print_to_stdio); + } + else if(print_log_filename != "" || print_to_stdio) + { + print_listener = new PrintListener(print_log_filename, print_to_stdio); + } + + if(write_log_filename != "") + { + EntityWriteListener *write_log = new EntityWriteListener(entity, false, write_log_filename); + write_listeners.push_back(write_log); + } + + //transform args into args variable + EvaluableNode *call_stack = entity->evaluableNodeManager.AllocNode(ENT_LIST); + EvaluableNode *args_node = entity->evaluableNodeManager.AllocNode(ENT_ASSOC); + call_stack->AppendOrderedChildNode(args_node); + + //top-level stack variable holding argv + args_node->SetMappedChildNode("argv", CreateListOfStringsFromIteratorAndFunction(passthrough_params, + &entity->evaluableNodeManager, [](auto s) { return s; })); + + //top-level stack variable holding path to interpreter + EvaluableNode *interpreter_node = entity->evaluableNodeManager.AllocNode(ENT_STRING); + interpreter_node->SetStringValue(interpreter_path); + args_node->SetMappedChildNode("interpreter", interpreter_node); + + //execute the entity + entity->Execute(0, num_steps_executed, 0, num_nodes_allocated, &write_listeners, print_listener, call_stack); + + //clean up the nodes created here + entity->evaluableNodeManager.FreeNodeTree(call_stack); + + //detect memory leaks for debugging + // the entity should have one reference left, which is the entity's code itself + if(entity->evaluableNodeManager.GetNumberOfNodesReferenced() > 1) + { + auto &temp_used_nodes = entity->evaluableNodeManager.GetNodesReferenced(); + std::cerr << "Error: memory leak." << std::endl; + + if(Platform_IsDebuggerPresent()) + { + std::cerr << "The following temporary nodes are still in use : " << std::endl; + for(auto &[used_node, _] : temp_used_nodes) + { + std::cerr << "Item:" << std::endl; + std::cerr << Parser::Unparse(used_node, &entity->evaluableNodeManager); + } + } + } + + PrintProfilingInformationIfApplicable(); + + if(Platform_IsDebuggerPresent()) + { + auto nodes_used = entity->evaluableNodeManager.GetNumberOfUsedNodes(); + auto nodes_free = entity->evaluableNodeManager.GetNumberOfUnusedNodes(); + std::cout << "Root Entity nodes in use: " << nodes_used << "/" << (nodes_used + nodes_free) << std::endl; + } + + for(auto &ewl : write_listeners) + delete ewl; + if(print_listener != nullptr) + delete print_listener; + + if(Platform_IsDebuggerPresent()) + { + delete entity; + + auto num_strings_used = string_intern_pool.GetNumDynamicStringsInUse(); + //there should always at least be the empty string + if(num_strings_used > 0) + { + std::cerr << "ERROR: Num strings still in use: " << num_strings_used << std::endl; + std::vector in_use = string_intern_pool.GetNonStaticStringsInUse(); + for(auto &s : in_use) + std::cerr << '"' << s << '"' << std::endl; + } + + std::cout << "Memory reclaimation complete." << std::endl; + } + + return 0; + } +} diff --git a/src/Amalgam/AmalgamTrace.cpp b/src/Amalgam/AmalgamTrace.cpp new file mode 100644 index 00000000..a8f99171 --- /dev/null +++ b/src/Amalgam/AmalgamTrace.cpp @@ -0,0 +1,143 @@ +//project headers: +#include "Amalgam.h" +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "Entity.h" +#include "EntityExternalInterface.h" +#include "EntityQueries.h" +#include "EvaluableNode.h" +#include "Parser.h" +#include "PerformanceProfiler.h" +#include "PlatformSpecific.h" +#include "RandomStream.h" + +//system headers: +#include +#include +#include + +extern EntityExternalInterface entint; + +const std::string SUCCESS_RESPONSE = std::string("success"); +const std::string FAILURE_RESPONSE = std::string("failure"); + +//runs a loop processing commands in the same manner as the API +// Message structure: [ADDITIONAL ARGS] [DATA] +int RunAmalgamTrace(std::istream *in_stream, std::ostream *out_stream, std::string &random_seed) +{ + if(in_stream == nullptr) + return 0; + + RandomStream random_stream(random_seed); + + //set default store to be compressed + asset_manager.defaultEntityExtension = FILE_EXTENSION_COMPRESSED_AMALGAM_CODE; + + // Define all these variables outside the main loop to reduce memory churn. + std::string input; + std::string handle; + std::string label; + std::string command; + std::string data; + std::string load_contained; + std::string persistent; + std::string print_listener_path; + std::string transaction_listener_path; + std::string response; + + // program loop + while(in_stream->good()) + { + // read external input + getline(*in_stream, input, '\n'); + + command = StringManipulation::RemoveFirstWord(input); + + // perform specified operation + if(command == "LOAD_ENTITY") + { + // used in LOAD_ENTITY to account for filenames with spaces. + std::vector command_tokens = Platform_SplitArgString(input); + + if(command_tokens.size() >= 4) + { + handle = command_tokens[0]; + data = command_tokens[1]; // path to amlg file + persistent = command_tokens[2]; + load_contained = command_tokens[3]; + + if(command_tokens.size() >= 5) + print_listener_path = command_tokens[4]; + else + print_listener_path = ""; + + if(command_tokens.size() >= 6) + transaction_listener_path = command_tokens[5]; + else + transaction_listener_path = ""; + + std::string new_rand_seed = random_stream.CreateOtherStreamStateViaString("trace"); + bool result = entint.LoadEntity(handle, data, persistent == "true", load_contained == "true", transaction_listener_path, print_listener_path, new_rand_seed); + response = result ? SUCCESS_RESPONSE : FAILURE_RESPONSE; + } + else + { + //Insufficient arguments for LOAD_ENTITY + response = FAILURE_RESPONSE; + } + } + else if(command == "SET_JSON_TO_LABEL") + { + handle = StringManipulation::RemoveFirstWord(input); + label = StringManipulation::RemoveFirstWord(input); + data = input; // json data + bool result = entint.SetJSONToLabel(handle, label, data); + response = result ? SUCCESS_RESPONSE : FAILURE_RESPONSE; + } + else if(command == "GET_JSON_FROM_LABEL") + { + handle = StringManipulation::RemoveFirstWord(input); + label = StringManipulation::RemoveFirstWord(input); + response = entint.GetJSONFromLabel(handle, label); + } + else if(command == "EXECUTE_ENTITY_JSON") + { + handle = StringManipulation::RemoveFirstWord(input); + label = StringManipulation::RemoveFirstWord(input); + data = input; // json data + response = entint.ExecuteEntityJSON(handle, label, data); + } + else if(command == "SET_RANDOM_SEED") + { + handle = StringManipulation::RemoveFirstWord(input); + data = input; + bool result = entint.SetRandomSeed(handle, data); + response = result ? SUCCESS_RESPONSE : FAILURE_RESPONSE; + } + else if(command == "VERSION") + { + response = AMALGAM_VERSION_STRING; + } + else if(command == "EXIT") + { + break; + } + else if(command == "#" || command == "") + { + // Commment or blank lines used in execution dumps. + } + else + { + response = "Unknown command: " + command; + } + + // return response + if(out_stream != nullptr) + *out_stream << response << std::endl; + } + + if(Platform_IsDebuggerPresent()) + std::cout << "Trace file complete." << std::endl; + + return 0; +} diff --git a/src/Amalgam/AssetManager.cpp b/src/Amalgam/AssetManager.cpp new file mode 100644 index 00000000..3155baba --- /dev/null +++ b/src/Amalgam/AssetManager.cpp @@ -0,0 +1,475 @@ +//project headers: +#include "BinaryPacking.h" +#include "AssetManager.h" +#include "EvaluableNode.h" +#include "FilenameEscapeProcessor.h" +#include "FileSupportCSV.h" +#include "FileSupportJSON.h" +#include "FileSupportYAML.h" +#include "PlatformSpecific.h" + +//system headers: +#include +#include +#include +#include +#include + +AssetManager asset_manager; + +EvaluableNodeReference AssetManager::LoadResourcePath(std::string &resource_path, + std::string &resource_base_path, std::string &file_type, EvaluableNodeManager *enm, bool escape_filename) +{ + //get file path based on the file loaded + std::string path, file_base, extension; + Platform_SeparatePathFileExtension(resource_path, path, file_base, extension); + resource_base_path = path + file_base; + + //escape the string if necessary, otherwise just use the regular one + std::string processed_resource_path; + if(escape_filename) + { + resource_base_path = path + FilenameEscapeProcessor::SafeEscapeFilename(file_base); + processed_resource_path = resource_base_path + "." + extension; + } + else + { + resource_base_path = path + file_base; + processed_resource_path = resource_path; + } + + if(file_type == "") + file_type = extension; + + //load this entity based on file_type + if(file_type == FILE_EXTENSION_AMALGAM || file_type == FILE_EXTENSION_AMLG_METADATA) + { + auto [code, code_success] = Platform_OpenFileAsString(processed_resource_path); + if(!code_success) + { + if(file_type == FILE_EXTENSION_AMALGAM) + std::cerr << code << std::endl; + return EvaluableNodeReference::Null(); + } + + //check for byte order mark for UTF-8 that may optionally appear at the beginning of the file. + // If it is present, remove it. No other encoding standards besides ascii and UTF-8 are currently permitted. + if(code.size() >= 3) + { + if(static_cast(code[0]) == 0xEF && static_cast(code[1]) == 0xBB && static_cast(code[2]) == 0xBF) + code.erase(0, 3); + } + + if(!debugSources) + return Parser::Parse(code, enm); + else + return Parser::Parse(code, enm, &resource_path); + } + else if(file_type == FILE_EXTENSION_JSON) + return EvaluableNodeReference(EvaluableNodeJSONTranslation::Load(processed_resource_path, enm), true); + else if(file_type == FILE_EXTENSION_YAML) + return EvaluableNodeReference(EvaluableNodeYAMLTranslation::Load(processed_resource_path, enm), true); + else if(file_type == FILE_EXTENSION_CSV) + return EvaluableNodeReference(FileSupportCSV::Load(processed_resource_path, enm), true); + else if(file_type == FILE_EXTENSION_COMPRESSED_STRING_LIST) + { + BinaryData compressed_data; + if(!LoadFileToBuffer(processed_resource_path, compressed_data)) + return EvaluableNodeReference::Null(); + + OffsetIndex cur_offset = 0; + auto strings = DecompressStrings(compressed_data, cur_offset); + + //transform the decompressed strings into a list of strings + EvaluableNode *list_of_strings = enm->AllocListNodeWithOrderedChildNodes(ENT_STRING, strings.size()); + auto &list_ocn = list_of_strings->GetOrderedChildNodes(); + for(size_t i = 0; i < strings.size(); i++) + list_ocn[i]->SetStringValue(strings[i]); + + return EvaluableNodeReference(list_of_strings, true); + } + else if(file_type == FILE_EXTENSION_COMPRESSED_AMALGAM_CODE) + { + BinaryData compressed_data; + if(!LoadFileToBuffer(processed_resource_path, compressed_data)) + return EvaluableNodeReference::Null(); + + OffsetIndex cur_offset = 0; + auto strings = DecompressStrings(compressed_data, cur_offset); + if(strings.size() == 0) + return EvaluableNodeReference::Null(); + + if(!debugSources) + return Parser::Parse(strings[0], enm); + else + return Parser::Parse(strings[0], enm, &resource_path); + } + else //just load the file as a string + { + std::string s; + if(LoadFileToBuffer(processed_resource_path, s)) + return EvaluableNodeReference(enm->AllocNode(ENT_STRING, s), true); + else + return EvaluableNodeReference::Null(); + } +} + +bool AssetManager::StoreResourcePath(EvaluableNode *code, std::string &resource_path, + std::string &resource_base_path, std::string &file_type, EvaluableNodeManager *enm, bool escape_filename, bool sort_keys) +{ + //get file path based on the file being stored + std::string path, file_base, extension; + Platform_SeparatePathFileExtension(resource_path, path, file_base, extension); + + //escape the string if necessary, otherwise just use the regular one + std::string processed_resource_path; + if(escape_filename) + { + resource_base_path = path + FilenameEscapeProcessor::SafeEscapeFilename(file_base); + processed_resource_path = resource_base_path + "." + extension; + } + else + { + resource_base_path = path + file_base; + processed_resource_path = resource_path; + } + + if(file_type == "") + file_type = extension; + + //store the entity based on file_type + if(file_type == FILE_EXTENSION_AMALGAM || file_type == FILE_EXTENSION_AMLG_METADATA) + { + std::ofstream outf(processed_resource_path, std::ios::out | std::ios::binary); + if(!outf.good()) + return false; + + std::string code_string = Parser::Unparse(code, enm, true, true, sort_keys); + outf.write(code_string.c_str(), code_string.size()); + outf.close(); + + return true; + } + else if(file_type == FILE_EXTENSION_JSON) + return EvaluableNodeJSONTranslation::Store(code, processed_resource_path, enm, sort_keys); + else if(file_type == FILE_EXTENSION_YAML) + return EvaluableNodeYAMLTranslation::Store(code, processed_resource_path, enm, sort_keys); + else if(file_type == FILE_EXTENSION_CSV) + return FileSupportCSV::Store(code, processed_resource_path, enm); + else if(file_type == FILE_EXTENSION_COMPRESSED_STRING_LIST) + { + //translate list of strings into a map required for compression + size_t cur_index = 0; + CompactHashMap string_map; + if(code != nullptr) + { + for(auto &cn : code->GetOrderedChildNodes()) + string_map[EvaluableNode::ToString(cn)] = cur_index++; + } + + //compress and store + BinaryData compressed_data = CompressStrings(string_map); + if(StoreFileFromBuffer(processed_resource_path, compressed_data)) + return EvaluableNodeReference(enm->AllocNode(ENT_TRUE), true); + else + return EvaluableNodeReference::Null(); + } + else if(file_type == FILE_EXTENSION_COMPRESSED_AMALGAM_CODE) + { + std::string code_string = Parser::Unparse(code, enm, false, true, sort_keys); + + //transforminto format needed for compression + CompactHashMap string_map; + string_map[code_string] = 0; + + //compress and store + BinaryData compressed_data = CompressStrings(string_map); + if(StoreFileFromBuffer(processed_resource_path, compressed_data)) + return EvaluableNodeReference(enm->AllocNode(ENT_TRUE), true); + else + return EvaluableNodeReference::Null(); + } + else //binary string + { + std::string s = EvaluableNode::ToString(code); + if(StoreFileFromBuffer(processed_resource_path, s)) + return EvaluableNodeReference(enm->AllocNode(ENT_TRUE), true); + else + return EvaluableNodeReference::Null(); + } + + return false; +} + +Entity *AssetManager::LoadEntityFromResourcePath(std::string &resource_path, std::string &file_type, + bool persistent, bool load_contained_entities, bool escape_filename, bool escape_contained_filenames, std::string default_random_seed) +{ + std::string resource_base_path; + Entity *new_entity = new Entity(); + + EvaluableNodeReference code = LoadResourcePath(resource_path, resource_base_path, file_type, &new_entity->evaluableNodeManager, escape_filename); + if(code == nullptr) + { + delete new_entity; + return nullptr; + } + new_entity->SetRoot(code, true); + + //load any metadata like random seed + std::string metadata_filename = resource_base_path + "." + FILE_EXTENSION_AMLG_METADATA; + std::string metadata_base_path; + std::string metadata_extension; + EvaluableNode *metadata = LoadResourcePath(metadata_filename, metadata_base_path, metadata_extension, &new_entity->evaluableNodeManager, escape_filename); + if(metadata != nullptr) + { + if(EvaluableNode::IsAssociativeArray(metadata)) + { + EvaluableNode **seed = metadata->GetMappedChildNode(ENBISI_rand_seed); + if(seed != nullptr) + default_random_seed = EvaluableNode::ToString(*seed); + } + } + + new_entity->SetRandomState(default_random_seed, true); + + if(persistent) + { + #ifdef MULTITHREAD_INTERFACE + Concurrency::WriteLock lock(persistentEntitiesMutex); + #endif + persistentEntities[new_entity] = resource_path; + } + + //load contained entities + if(load_contained_entities) + { + //iterate over all files in directory + resource_base_path.append("/"); + std::vector file_names; + Platform_GetFileNamesOfType(file_names, resource_base_path, file_type); + for(auto &f : file_names) + { + std::string ce_path, ce_file_base, ce_extension; + Platform_SeparatePathFileExtension(f, ce_path, ce_file_base, ce_extension); + + std::string entity_name; + if(escape_contained_filenames) + entity_name = FilenameEscapeProcessor::SafeUnescapeFilename(ce_file_base); + else + entity_name = ce_file_base; + + + //don't escape filename again because it's already escaped in this loop + std::string default_seed = new_entity->CreateOtherRandomStreamStateViaString(entity_name); + std::string contained_resource_path = resource_base_path + ce_file_base + "." + ce_extension; + Entity *contained_entity = LoadEntityFromResourcePath(contained_resource_path, file_type, + false, true, false, escape_contained_filenames, default_seed); + + new_entity->AddContainedEntity(contained_entity, entity_name); + } + } + + return new_entity; +} + +bool AssetManager::StoreEntityToResourcePath(Entity *entity, std::string &resource_path, std::string &file_type, + bool update_persistence_location, bool store_contained_entities, bool escape_filename, bool escape_contained_filenames, bool sort_keys) +{ + if(entity == nullptr) + return false; + + std::string resource_base_path; + bool all_stored_successfully = AssetManager::StoreResourcePath(entity->GetRoot(), + resource_path, resource_base_path, file_type, &entity->evaluableNodeManager, escape_filename, sort_keys); + + //store any metadata like random seed + std::string metadata_filename = resource_base_path + "." + FILE_EXTENSION_AMLG_METADATA; + EvaluableNode en_assoc(ENT_ASSOC); + EvaluableNode en_rand_seed(ENT_STRING, entity->GetRandomState()); + en_assoc.SetMappedChildNode(ENBISI_rand_seed, &en_rand_seed); + + std::string metadata_base_path; + std::string metadata_extension; + //don't reescape the path here, since it has already been done + StoreResourcePath(&en_assoc, metadata_filename, metadata_base_path, metadata_extension, &entity->evaluableNodeManager, false, sort_keys); + + //store contained entities + if(store_contained_entities && entity->GetContainedEntities().size() > 0) + { + //create directory in case it doesn't exist + std::filesystem::create_directory(resource_base_path); + + //store any contained entities + resource_base_path.append("/"); + for(auto contained_entity : entity->GetContainedEntities()) + { + std::string new_resource_path; + if(escape_contained_filenames) + { + const std::string &ce_escaped_filename = FilenameEscapeProcessor::SafeEscapeFilename(contained_entity->GetId()); + new_resource_path = resource_base_path + ce_escaped_filename + "." + file_type; + } + else + new_resource_path = resource_base_path + contained_entity->GetId() + "." + file_type; + + //don't escape filename again because it's already escaped in this loop + StoreEntityToResourcePath(contained_entity, new_resource_path, file_type, false, true, false, escape_contained_filenames, sort_keys); + } + } + + if(update_persistence_location) + { + #ifdef MULTITHREAD_INTERFACE + Concurrency::WriteLock lock(persistentEntitiesMutex); + #endif + persistentEntities[entity] = resource_base_path + "." + file_type; //use escaped string + } + + return all_stored_successfully; +} + +void AssetManager::UpdateEntity(Entity *entity) +{ +#ifdef MULTITHREAD_INTERFACE + Concurrency::ReadLock lock(persistentEntitiesMutex); +#endif + //early out if no persistent entities + if(persistentEntities.size() == 0) + return; + + Entity *cur = entity; + std::string slice_path; + std::string filename; + std::string extension; + std::string traversal_path; + + while(cur != nullptr) + { + const auto &pe = persistentEntities.find(cur); + if(pe != end(persistentEntities)) + { + Platform_SeparatePathFileExtension(pe->second, slice_path, filename, extension); + std::string new_path = slice_path + filename + traversal_path + "." + extension; + + //the outermost file is already escaped, but persistent entities must be recursively escaped + StoreEntityToResourcePath(entity, new_path, extension, false, false, false, true, false); + } + + //don't need to continue and allocate extra traversal path if already at outermost entity + Entity *cur_container = cur->GetContainer(); + if(cur_container == nullptr) + break; + + std::string escaped_entity_id = FilenameEscapeProcessor::SafeEscapeFilename(cur->GetId()); + traversal_path = "/" + escaped_entity_id + traversal_path; + cur = cur_container; + } +} + +void AssetManager::CreateEntity(Entity *entity) +{ + if(entity == nullptr) + return; + +#ifdef MULTITHREAD_INTERFACE + Concurrency::ReadLock lock(persistentEntitiesMutex); +#endif + //early out if no persistent entities + if(persistentEntities.size() == 0) + return; + + Entity *cur = entity->GetContainer(); + std::string slice_path; + std::string filename; + std::string extension = defaultEntityExtension; + std::string traversal_path = ""; + std::string escaped_entity_id = FilenameEscapeProcessor::SafeEscapeFilename(entity->GetId()); + std::string id_suffix = "/" + escaped_entity_id + "." + defaultEntityExtension; + while(cur != nullptr) + { + const auto &pe = persistentEntities.find(cur); + if(pe != end(persistentEntities)) + { + Platform_SeparatePathFileExtension(pe->second, slice_path, filename, extension); + //create contained entity directory in case it doesn't currently exist + std::string new_path = slice_path + filename + traversal_path; + std::filesystem::create_directory(new_path); + + new_path += id_suffix; + StoreEntityToResourcePath(entity, new_path, extension, false, true, false, true, false); + } + + //don't need to continue and allocate extra traversal path if already at outermost entity + Entity *cur_container = cur->GetContainer(); + if(cur_container == nullptr) + break; + + escaped_entity_id = FilenameEscapeProcessor::SafeEscapeFilename(cur->GetId()); + traversal_path = "/" + escaped_entity_id + traversal_path; + cur = cur_container; + } +} + +void AssetManager::SetRootPermission(Entity *entity, bool permission) +{ + if(entity == nullptr) + return; + +#ifdef MULTITHREAD_INTERFACE + Concurrency::WriteLock lock(rootEntitiesMutex); +#endif + + if(permission) + rootEntities.insert(entity); + else + rootEntities.erase(entity); +} + +void AssetManager::DestroyPersistentEntity(Entity *entity) +{ + Entity *cur = entity; + std::string slice_path; + std::string filename; + std::string extension; + std::string traversal_path; + + //remove it as a persistent entity if it happened to be a direct one (erase won't do anything if it doesn't exist) + persistentEntities.erase(entity); + + //delete any contained entities that are persistent + for(auto contained_entity : entity->GetContainedEntities()) + DestroyPersistentEntity(contained_entity); + + //cover the case if any of this entity's containers were also persisted entities + while(cur != nullptr) + { + const auto &pe = persistentEntities.find(cur); + if(pe != end(persistentEntities)) + { + //get metadata filename + Platform_SeparatePathFileExtension(pe->second, slice_path, filename, extension); + std::string total_filepath = slice_path + filename + traversal_path; + + //delete files + std::filesystem::remove(total_filepath + "." + defaultEntityExtension); + std::filesystem::remove(total_filepath + "." + FILE_EXTENSION_AMLG_METADATA); + + //remove directory and all contents if it exists (command will fail if it doesn't exist) + std::filesystem::remove_all(total_filepath); + } + + std::string escaped_entity_id = FilenameEscapeProcessor::SafeEscapeFilename(cur->GetId()); + traversal_path = "/" + escaped_entity_id + traversal_path; + + cur = cur->GetContainer(); + } +} + +void AssetManager::RemoveRootPermissions(Entity *entity) +{ + //remove permissions on any contained entities + for(auto contained_entity : entity->GetContainedEntities()) + RemoveRootPermissions(contained_entity); + + SetRootPermission(entity, false); +} diff --git a/src/Amalgam/AssetManager.h b/src/Amalgam/AssetManager.h new file mode 100644 index 00000000..c07d7317 --- /dev/null +++ b/src/Amalgam/AssetManager.h @@ -0,0 +1,166 @@ +#pragma once + +//project headers: +#include "Entity.h" +#include "EvaluableNode.h" +#include "HashMaps.h" + +//system headers: +#include +#include + +const std::string FILE_EXTENSION_AMLG_METADATA("mdam"); +const std::string FILE_EXTENSION_AMALGAM("amlg"); +const std::string FILE_EXTENSION_JSON("json"); +const std::string FILE_EXTENSION_YAML("yaml"); +const std::string FILE_EXTENSION_CSV("csv"); +const std::string FILE_EXTENSION_COMPRESSED_STRING_LIST("cstl"); +const std::string FILE_EXTENSION_COMPRESSED_AMALGAM_CODE("caml"); + +class AssetManager; +extern AssetManager asset_manager; + +class AssetManager +{ +public: + AssetManager() + : defaultEntityExtension(FILE_EXTENSION_AMALGAM), debugSources(false), debugMinimal(false) + { } + + //Returns the code to the corresponding entity by resource_path + // sets resource_base_path to the resource path without the extension + //if file_type is not an empty string, it will use the specified file_type instead of the filename's extension + EvaluableNodeReference LoadResourcePath(std::string &resource_path, std::string &resource_base_path, + std::string &file_type, EvaluableNodeManager *enm, bool escape_filename); + + //Stores the code to the corresponding resource path + // sets resource_base_path to the resource path without the extension, and extension accordingly + //if file_type is not an empty string, it will use the specified file_type instead of the filename's extension + static bool StoreResourcePath(EvaluableNode *code, std::string &resource_path, std::string &resource_base_path, + std::string &file_type, EvaluableNodeManager *enm, bool escape_filename, bool sort_keys); + + //Loads an entity, including contained entites, etc. from the resource path specified + //if file_type is not an empty string, it will use the specified file_type instead of the filename's extension + // if persistent is true, then it will keep the resource updated based on any calls to UpdateEntity + //if the resource does not have a metadata file, will use default_random_seed as its seed + Entity *LoadEntityFromResourcePath(std::string &resource_path, std::string &file_type, bool persistent, bool load_contained_entities, + bool escape_filename, bool escape_contained_filenames, std::string default_random_seed); + + //Stores an entity, including contained entites, etc. from the resource path specified + //if file_type is not an empty string, it will use the specified file_type instead of the filename's extension + // if persistent is true, then it will keep the resource updated based on any calls to UpdateEntity (will not make not persistent if was previously loaded as persistent) + // returns true if successful + bool StoreEntityToResourcePath(Entity *entity, std::string &resource_path, std::string &file_type, + bool update_persistence_location, bool store_contained_entities, + bool escape_filename, bool escape_contained_filenames, bool sort_keys); + + //Indicates that the entity has been written to or updated, and so if the asset is persistent, the persistent copy should be updated + void UpdateEntity(Entity *entity); + void CreateEntity(Entity *entity); + inline void DestroyEntity(Entity *entity) + { +#ifdef MULTITHREAD_INTERFACE + Concurrency::WriteLock lock(persistentEntitiesMutex); +#endif + + RemoveRootPermissions(entity); + + if(persistentEntities.size() > 0) + DestroyPersistentEntity(entity); + } + + //sets the entity's root permission to permission + void SetRootPermission(Entity *entity, bool permission); + + // Checks if this entity or one of its containers is persistent + inline bool IsEntityIndirectlyPersistent(Entity *entity) + { + Entity *cur = entity; + + #ifdef MULTITHREAD_INTERFACE + Concurrency::ReadLock lock(persistentEntitiesMutex); + #endif + + while(cur != nullptr) + { + if(persistentEntities.find(cur) != end(persistentEntities)) + return true; + } + + return false; + } + + // Checks if this entity specifically has been loaded as persistent + inline bool IsEntityDirectlyPersistent(Entity *entity) + { return persistentEntities.find(entity) != end(persistentEntities); } + + inline bool DoesEntityHaveRootPermission(Entity *entity) + { + if(entity == nullptr) + return false; + + #ifdef MULTITHREAD_INTERFACE + Concurrency::ReadLock lock(rootEntitiesMutex); + #endif + + return rootEntities.find(entity) != end(rootEntities); + } + + //loads filename into the buffer specified by b (of type BufferType of elements BufferElementType), returns true if successful, false if not + template + static bool LoadFileToBuffer(const std::string &filename, BufferType &b) + { + std::ifstream f(filename, std::fstream::binary | std::fstream::in); + + if(!f.good()) + return false; + + f.seekg(0, std::ios::end); + b.reserve(f.tellg()); + f.seekg(0, std::ios::beg); + + b.assign(std::istreambuf_iterator(f), std::istreambuf_iterator()); + return true; + } + + //stores buffer b (of type BufferType of elements BufferElementType) into the filename, returns true if successful, false if not + template + static bool StoreFileFromBuffer(const std::string &filename, BufferType &b) + { + std::ofstream f(filename, std::fstream::binary | std::fstream::out); + if(!f.good()) + return false; + + f.write(reinterpret_cast(&b[0]), sizeof(char) * b.size()); + return true; + } + + //default extension to store new entities + std::string defaultEntityExtension; + + //if true, will enable debugging the sources of loading nodes + bool debugSources; + + //if true, will exclude current position details when stepping + bool debugMinimal; + +private: + + //recursively deletes persistent entities + void DestroyPersistentEntity(Entity *entity); + + //recursively removes root permissions + void RemoveRootPermissions(Entity *entity); + + //entities that need changes stored, and the resource paths to store them + CompactHashMap persistentEntities; + + //entities that have root permissions + Entity::EntitySetType rootEntities; + +#ifdef MULTITHREAD_INTERFACE + //mutexes for global data + Concurrency::ReadWriteMutex persistentEntitiesMutex; + Concurrency::ReadWriteMutex rootEntitiesMutex; +#endif +}; diff --git a/src/Amalgam/BinaryPacking.cpp b/src/Amalgam/BinaryPacking.cpp new file mode 100644 index 00000000..664fae9b --- /dev/null +++ b/src/Amalgam/BinaryPacking.cpp @@ -0,0 +1,467 @@ +//project headers: +#include "BinaryPacking.h" + +//system headers: +#include +#include +#include + +void UnparseIndexToCompactIndexAndAppend(BinaryData &bd_out, OffsetIndex oi) +{ + //start by stripping off the data of the least significant 7 bits + uint8_t cur_byte = (oi & 0x7F); + oi >>= 7; + + //as long as there are more bits in the index + while(oi != 0) + { + //mark with most significant bit + cur_byte |= 0x80; + bd_out.push_back(cur_byte); + + //take off another 7 bits + cur_byte = (oi & 0x7F); + oi >>= 7; + } + bd_out.push_back(cur_byte); +} + +OffsetIndex ParseCompactIndexToIndexAndAdvance(BinaryData &bd, OffsetIndex &bd_offset) +{ + OffsetIndex index = 0; + for(int i = 0; bd_offset < bd.size(); i++, bd_offset++) + { + uint8_t cur_byte = bd[bd_offset]; + + //if the most significant bit is set, then roll this on to the existing index + bool last_byte = true; + if(cur_byte & 0x80) + { + last_byte = false; + cur_byte &= 0x7F; + } + + //put the 7 bits onto the index + index |= (static_cast(cur_byte) << (7 * i)); + + if(last_byte) + { + //advance for last byte + bd_offset++; + break; + } + } + + return index; +} + +//Huffman Encoding implementation for compressing and decompressing data +template +class HuffmanTree +{ +public: + constexpr HuffmanTree(value_type value, size_t value_frequency, size_t node_index, + HuffmanTree *left = nullptr, HuffmanTree *right = nullptr) + : value(value), valueFrequency(value_frequency), nodeIndex(node_index), left(left), right(right) + { } + + ~HuffmanTree() + { + delete left; + delete right; + } + + //number of bits per value based on the number of bytes long value_type is + static constexpr int bitsPerValue = 8 * sizeof(value_type); + + static HuffmanTree *BuildTreeFromValueFrequencies( + std::array::max() + 1> &byte_frequencies) + { + size_t cur_node_index = 0; + + //start by building the leaf nodes + std::priority_queue *, + std::vector *>, HuffmanTree::Compare > alphabet_heap; + + //create all the leaf nodes and add them to the priority queue + for(size_t i = 0; i < byte_frequencies.size(); i++) + { + auto leaf = new HuffmanTree(static_cast(i), byte_frequencies[i], cur_node_index++); + alphabet_heap.push(leaf); + } + + //Merge leaf nodes with lowest values until have just one at the top + HuffmanTree *huffman_tree = nullptr; + while(alphabet_heap.size() > 1) + { + auto left = alphabet_heap.top(); + alphabet_heap.pop(); + auto right = alphabet_heap.top(); + alphabet_heap.pop(); + + //since non-leaf nodes aren't used for encoding, just use the value 0 + huffman_tree = new HuffmanTree(0, left->valueFrequency + right->valueFrequency, cur_node_index++, left, right); + alphabet_heap.push(huffman_tree); + } + + return huffman_tree; + } + + //for sorting HuffmanTree nodes by frequency + class Compare + { + public: + constexpr bool operator()(HuffmanTree *a, HuffmanTree *b) + { + //if valueFrequency is the same for both values, break tie by the value itself + // to ensure consistent ordering across platforms and heap implementations + if(a->valueFrequency == b->valueFrequency) + { + //if values are equal, break ties by node index + if(a->value == b->value) + return a->nodeIndex > b->nodeIndex; + + return a->value > b->value; + } + return a->valueFrequency > b->valueFrequency; + } + }; + + //looks up the next value in the tree based from the bit string in bd from start_index up until end_index + //increments start_index based on the length of the code consumed + inline value_type LookUpCode(BinaryData &bd, OffsetIndex &start_index, OffsetIndex end_index) + { + auto node = this; + + OffsetIndex cur_byte = (start_index / bitsPerValue); + OffsetIndex cur_bit = (start_index % bitsPerValue); + + while(start_index < end_index) + { + //if leaf node, then return value + if(node->left == nullptr) + return node->value; + + if(bd[cur_byte] & (1 << cur_bit) ) + node = node->right; + else + node = node->left; + + start_index++; + cur_bit++; + if(cur_bit == bitsPerValue) + { + cur_bit = 0; + cur_byte++; + } + } + + //if leaf node, then return value; need this again incase used up last bits + if(node->left == nullptr) + return node->value; + + //shouldn't make it here -- ran out of bits + return 0; + } + + //the value of this node in the HuffmanTree and its frequency + value_type value; + size_t valueFrequency; + + //node index used for breaking ties on value and valueFrequency to ensure + // that Huffman trees are always generated identically regardless of priority queue implementation + size_t nodeIndex; + + //rest of the tree + HuffmanTree *left; + HuffmanTree *right; +}; + +//class to compress and decompress bundles of strings +class StringCodec +{ +public: + + const static size_t NUM_UINT8_VALUES = std::numeric_limits::max() + 1; + + StringCodec(std::array &byte_frequencies) + { + //build the huffman_tree based on the byte frequencies + huffmanTree = HuffmanTree::BuildTreeFromValueFrequencies(byte_frequencies); + } + + ~StringCodec() + { + if(huffmanTree != nullptr) + delete huffmanTree; + } + + inline BinaryData EncodeString(BinaryData &uncompressed_data) + { + //build lookup table from huffman_tree + + //the code for each possibly representable value + // for example, if 44 is the boolean vector 10, then it will have 10 at the 44th index + //all valueCodes are initialized to an empty boolean array + std::array, NUM_UINT8_VALUES> valueCodes; + + //keep a double-ended queue to traverse the tree, building up the codes for each part of the tree + std::deque *, std::vector>> remaining_nodes; + remaining_nodes.push_back(std::make_pair(huffmanTree, std::vector())); + + //while more tree to convert + while(!remaining_nodes.empty()) + { + auto node = remaining_nodes.front().first; + //explicitly make a copy to make sure there's not a reference being kept + std::vector code(remaining_nodes.front().second); + remaining_nodes.pop_front(); + + auto left = node->left; + auto right = node->right; + + //if not leaf node (a Huffman Tree node is either full or not) + if(left != nullptr) + { + //make another copy of the code for the other child node + std::vector code_copy(code); + + //append a 0 for left, 1 for right + code.push_back(0); + remaining_nodes.push_back(std::make_pair(left, code)); + code_copy.push_back(1); + remaining_nodes.push_back(std::make_pair(right, code_copy)); + } + else //leaf node + { + valueCodes[node->value] = code; + } + } + + //encode the data and store in compressed_data + BinaryData compressed_data; + //reserve some, probably not enough, but enough to get started + compressed_data.reserve(1 + uncompressed_data.size() / 4); + + //the first byte stores the number of extra bits in the last byte, so skip it for encoding + OffsetIndex ending_bit = 8; + OffsetIndex cur_byte = 1; + OffsetIndex cur_bit = 0; + + for(auto &c : uncompressed_data) + { + auto &value = valueCodes[c]; + OffsetIndex num_bits_to_add = value.size(); + + //make sure there are enough bytes to hold everything + // if one extra bit, then need a full extra byte, so add 7 bits to round up + ending_bit += num_bits_to_add; + compressed_data.resize((ending_bit + 7) / 8); + + for(auto bit : value) + { + //compressed_data is already initialized to zeros, so only need to set if true + if(bit) + compressed_data[cur_byte] |= (1 << cur_bit); + + cur_bit++; + if(cur_bit == 8) + { + cur_bit = 0; + cur_byte++; + } + } + } + + //store number of extra bits in first byte + compressed_data[0] = (ending_bit % 8); + + return compressed_data; + } + + inline BinaryData DecodeString(BinaryData &compressed_data) + { + //need at least one byte to represent the number of extra bits and another byte of actual value + if(compressed_data.size() < 2) + return BinaryData(); + + //count out all the potentially available bits + OffsetIndex end_bit = 8 * compressed_data.size(); + + //number of extra bits is stored in the first byte + if(compressed_data[0] != 0) + { + //if there is any number besides 0, then we need to remove 8 bits and add on whatever remains + end_bit -= 8; + end_bit += compressed_data[0]; + } + //skip the first byte + OffsetIndex start_bit = 8; + + //decompress the data + BinaryData uncompressed_data; + while(start_bit < end_bit) + uncompressed_data.push_back(huffmanTree->LookUpCode(compressed_data, start_bit, end_bit)); + + return uncompressed_data; + } + + //counts the number of bytes within bd for each value + //returns an array where each index represents each of the possible NUM_INT8_VALUES values, and the value at each is the number found + static std::array GetByteFrequencies(BinaryData &bd) + { + std::array value_counts{}; //initialize to zero with value-initialization {}'s + for(auto &b : bd) + value_counts[b]++; + + //get maximal count for any value + size_t max_count = 0; + for(auto &v : value_counts) + max_count = std::max(max_count, v); + + std::array normalized_value_counts{}; //initialize to zero with value-initialization {}'s + for(size_t i = 0; i < NUM_UINT8_VALUES; i++) + { + if(value_counts[i] == 0) + continue; + normalized_value_counts[i] = std::max(static_cast(255 * value_counts[i] / max_count), static_cast(1)); //make sure it has at least a value of 1 to be represented + } + + return normalized_value_counts; + } + + //Huffman tree to build and store between calls + HuffmanTree *huffmanTree; +}; + +BinaryData CompressStrings(CompactHashMap &string_map) +{ + //transform string map into vector and keep track of the total size + std::vector strings(string_map.size()); + size_t concatenated_strings_size = 0; + for(auto &[s, s_size] : string_map) + { + //make sure the index is valid before placing string in + if(s_size < strings.size()) + { + strings[s_size] = s; + concatenated_strings_size += s.size(); + } + } + + //concatenate all strings + BinaryData concatenated_strings; + concatenated_strings.reserve(concatenated_strings_size); + for(auto &s : strings) + concatenated_strings.insert(end(concatenated_strings), begin(s), end(s)); + + BinaryData encoded_string_library; + encoded_string_library.reserve(2 * StringCodec::NUM_UINT8_VALUES); //reserve enough to two entries for every value in the worst case; this will be expanded later + + ////////// + //compress the string + + //create and store the frequency table for each possible byte value + auto byte_frequencies = StringCodec::GetByteFrequencies(concatenated_strings); + for(size_t i = 0; i < StringCodec::NUM_UINT8_VALUES; i++) + { + //write value + encoded_string_library.push_back(byte_frequencies[i]); + + //if zero, then run-length encoding compress + if(byte_frequencies[i] == 0) + { + //count the number of additional zeros until next nonzero + uint8_t num_additional_zeros = 0; + while(i + 1 < StringCodec::NUM_UINT8_VALUES && byte_frequencies[i + 1] == 0) + { + num_additional_zeros++; + i++; + } + encoded_string_library.push_back(num_additional_zeros); + //next loop iteration will increment i and count the first zero + continue; + } + } + + //compress string + StringCodec ssc(byte_frequencies); + BinaryData encoded_strings = ssc.EncodeString(concatenated_strings); + + //write out compressed string + UnparseIndexToCompactIndexAndAppend(encoded_string_library, encoded_strings.size()); + encoded_string_library.resize(encoded_string_library.size() + encoded_strings.size()); + std::copy(begin(encoded_strings), end(encoded_strings), end(encoded_string_library) - encoded_strings.size()); + + //write out number of individual strings + UnparseIndexToCompactIndexAndAppend(encoded_string_library, strings.size()); + + //write out string offsets + size_t cur_string_end_offset = 0; + for(auto &s : strings) + { + cur_string_end_offset += s.size(); + UnparseIndexToCompactIndexAndAppend(encoded_string_library, cur_string_end_offset); + } + + return encoded_string_library; +} + +std::vector DecompressStrings(BinaryData &encoded_string_library, OffsetIndex &cur_offset) +{ + //return value + std::vector strings; + + ///////// + //decompress the string + + //read the frequency table for each possible byte value + std::array byte_frequencies{}; //initialize to zeros + for(size_t i = 0; i < StringCodec::NUM_UINT8_VALUES && cur_offset < encoded_string_library.size(); i++) + { + byte_frequencies[i] = encoded_string_library[cur_offset++]; + + //if 0, then run-length encoded + if(byte_frequencies[i] == 0) + { + //fill in that many zeros, but don't write beyond buffer + for(uint8_t num_additional_zeros = encoded_string_library[cur_offset++]; num_additional_zeros > 0 && i < StringCodec::NUM_UINT8_VALUES; num_additional_zeros--, i++) + byte_frequencies[i] = 0; + } + } + + //read encoded string + size_t encoded_strings_size = ParseCompactIndexToIndexAndAdvance(encoded_string_library, cur_offset); + //check if size past end of buffer + if(cur_offset + encoded_strings_size >= encoded_string_library.size()) + return strings; + BinaryData encoded_strings(begin(encoded_string_library) + cur_offset, begin(encoded_string_library) + cur_offset + encoded_strings_size); + cur_offset += encoded_strings_size; + + //decode compressed string buffer + StringCodec ssc(byte_frequencies); + BinaryData concatenated_strings = ssc.DecodeString(encoded_strings); + + //read number of individual strings + size_t num_strings = ParseCompactIndexToIndexAndAdvance(encoded_string_library, cur_offset); + strings.resize(num_strings); + + //read string offsets + size_t cur_string_start_offset = 0; + for(size_t i = 0; i < num_strings; i++) + { + //get the string end + size_t cur_string_end_offset = ParseCompactIndexToIndexAndAdvance(encoded_string_library, cur_offset); + + size_t max_copy_offset = end(concatenated_strings) - begin(concatenated_strings); + if(cur_string_end_offset > max_copy_offset) + cur_string_end_offset = max_copy_offset; + + //copy over the string + strings[i].assign(begin(concatenated_strings) + cur_string_start_offset, begin(concatenated_strings) + cur_string_end_offset); + + cur_string_start_offset = cur_string_end_offset; + } + + return strings; +} diff --git a/src/Amalgam/BinaryPacking.h b/src/Amalgam/BinaryPacking.h new file mode 100644 index 00000000..42787ccc --- /dev/null +++ b/src/Amalgam/BinaryPacking.h @@ -0,0 +1,24 @@ +#pragma once + +//project headers: +#include "HashMaps.h" + +//system headers: +#include +#include + +typedef uint64_t OffsetIndex; + +typedef std::vector BinaryData; + +//Appends the offset index oi to BinaryData +void UnparseIndexToCompactIndexAndAppend(BinaryData &bd_out, OffsetIndex oi); + +//Parses the BinaryData starting from the offset bd_offset until it has a full index or has reached the end of the binary data. bd_offset is advanced to the end of the +OffsetIndex ParseCompactIndexToIndexAndAdvance(BinaryData &bd, OffsetIndex &bd_offset); + +//given string_map, map of string to index, where the indices are of the range from 0 to string_map.size(), compresses the strings into BinaryData +BinaryData CompressStrings(CompactHashMap &string_map); + +//given encoded_string_library starting an cur_offset, advances cur_offset to the end of the encoded_string_library and returns a vector of strings decompressed from the encoded_string_library +std::vector DecompressStrings(BinaryData &encoded_string_library, OffsetIndex &cur_offset); diff --git a/src/Amalgam/Concurrency.cpp b/src/Amalgam/Concurrency.cpp new file mode 100644 index 00000000..4bd078ae --- /dev/null +++ b/src/Amalgam/Concurrency.cpp @@ -0,0 +1,49 @@ +//project headers: +#include "Concurrency.h" +#include "PlatformSpecific.h" + +#if defined(MULTITHREAD_SUPPORT) +ThreadPool Concurrency::threadPool; + +size_t _max_num_threads = std::thread::hardware_concurrency(); +#endif + +#if defined(_OPENMP) +//default with half the number of threads on the system (since Amalgam typically is bottlenecked by bus bandwidth) +size_t _max_num_threads = std::thread::hardware_concurrency() / 2; +#endif + +#if defined(MULTITHREAD_SUPPORT) || defined(_OPENMP) + +size_t Concurrency::GetMaxNumThreads() +{ + return _max_num_threads; +} + +void Concurrency::SetMaxNumThreads(size_t max_num_threads) +{ + if(max_num_threads > 0) + _max_num_threads = max_num_threads; + else + { +#ifdef MULTITHREAD_SUPPORT + _max_num_threads = std::thread::hardware_concurrency(); +#else //_OPENMP + //half rounded up if an odd number for some reason + _max_num_threads = (std::thread::hardware_concurrency() + 1) / 2; +#endif + } + +#ifdef _OPENMP + int num_threads = static_cast(_max_num_threads); + if(num_threads > 0) + omp_set_num_threads(num_threads); +#endif + +#ifdef MULTITHREAD_SUPPORT + threadPool.ChangeThreadPoolSize(_max_num_threads); +#endif + +} + +#endif diff --git a/src/Amalgam/Concurrency.h b/src/Amalgam/Concurrency.h new file mode 100644 index 00000000..2966c729 --- /dev/null +++ b/src/Amalgam/Concurrency.h @@ -0,0 +1,87 @@ +#pragma once + +//if MULTITHREAD_SUPPORT is defined, compiles code with multithreaded support, requires a C++0x17 or newer compiler +//MULTITHREAD_SUPPORT means multithreading will be enabled everywhere, including the appropriate locks +//MULTITHREAD_INTERFACE means that multithreading will be enabled only for the interface, +// which means that multithreaded applications can call this library. This is a subset of MULTITHREAD_SUPPORT +//MULTITHREAD_ENTITY_CALL_MUTEX will only allow one call per entity as an external library. + +#ifdef _OPENMP +#include +#endif + +#ifdef MULTITHREAD_SUPPORT + #include "ThreadPool.h" + #define MULTITHREAD_INTERFACE + #define MULTITHREAD_ENTITY_CALL_MUTEX +#endif + +#ifndef NO_REENTRANCY_LOCKS + #define MULTITHREAD_INTERFACE + + #ifndef NO_ENTITY_CALL_MUTEX + #define MULTITHREAD_ENTITY_CALL_MUTEX + #endif +#endif + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) || defined(_OPENMP) + +//system headers: +#include +#include +#include +#include +#include + +namespace Concurrency +{ + //standard mutex for singular access + typedef std::mutex SingleMutex; + + //standard lock for singular access + typedef std::unique_lock SingleLock; + + //standard read-write mutex + typedef std::shared_mutex ReadWriteMutex; + + //standard read lock on a read-write shared mutex + typedef std::shared_lock ReadLock; + + //standard write lock on a read-write shared mutex + typedef std::unique_lock WriteLock; + + //vector of standard read locks + typedef std::vector ReadLocksBuffer; + + //vector of standard write locks + typedef std::vector WriteLocksBuffer; + + //Object to perform scope-based unlocking of a vector of locks of LockType for an existing buffer + template + class MultipleLockBufferObject + { + public: + inline MultipleLockBufferObject(LockBufferType &_buffer) + { + buffer = &_buffer; + } + + inline ~MultipleLockBufferObject() + { + buffer->clear(); + } + + LockBufferType *buffer; + }; + + size_t GetMaxNumThreads(); + + //sets the maximum number of threads to use + // if zero is specified, then it uses a heuristic default based on the system + void SetMaxNumThreads(size_t max_num_threads); + +#ifdef MULTITHREAD_SUPPORT + extern ThreadPool threadPool; +#endif +}; +#endif diff --git a/src/Amalgam/Conviction.h b/src/Amalgam/Conviction.h new file mode 100644 index 00000000..a21cbfa1 --- /dev/null +++ b/src/Amalgam/Conviction.h @@ -0,0 +1,424 @@ +#pragma once + +//project headers: +#include "ConvictionUtil.h" +#include "EntityQueriesStatistics.h" +#include "KnnCache.h" +#include "SeparableBoxFilterDataStore.h" + +//system headers: +#include + +//manages all types of processing related to conviction +template +class ConvictionProcessor +{ +public: + //buffers to be reused for less memory churn + struct ConvictionProcessorBuffers + { + std::vector> neighbors; + std::vector> updatedDistanceContribs; + std::vector baseDistanceContributions; + std::vector baseDistanceProbabilities; + }; + +#ifdef MULTITHREAD_SUPPORT + ConvictionProcessor(ConvictionProcessorBuffers &_buffers, KnnCache &cache, + EntityQueriesStatistics::DistanceTransform &distance_transform, size_t num_nearest_neighbors, bool run_concurrently) +#else + ConvictionProcessor(ConvictionProcessorBuffers &_buffers, KnnCache &cache, + EntityQueriesStatistics::DistanceTransform &distance_transform, size_t num_nearest_neighbors) +#endif + { + buffers = &_buffers; + knnCache = &cache; + distanceTransform = &distance_transform; + numNearestNeighbors = num_nearest_neighbors; + +#ifdef MULTITHREAD_SUPPORT + runConcurrently = run_concurrently; +#endif + } + + //Computes distance contribution for entity_reference + // if additional_holdout_reference is specified, then it will ignore that case + inline double ComputeDistanceContribution(EntityReference entity_reference, + EntityReference additional_holdout_reference = DistanceReferencePair::InvalidReference()) + { + //fetch the knn results from the cache + buffers->neighbors.clear(); + knnCache->GetKnn(entity_reference, numNearestNeighbors, buffers->neighbors, additional_holdout_reference); + + return distanceTransform->ComputeDistanceContribution(buffers->neighbors, entity_reference); + } + + //Like the other ComputeDistanceContribution, but only includes included_entities + inline double ComputeDistanceContribution(EntityReference entity_reference, EntityReferenceSet &included_entities) + { + //fetch the knn results from the cache + buffers->neighbors.clear(); + knnCache->GetKnn(entity_reference, numNearestNeighbors, buffers->neighbors, included_entities); + + return distanceTransform->ComputeDistanceContribution(buffers->neighbors, entity_reference); + } + + //Computes the Distance Contributions for each entity specified in entities_to_compute + //if entities_to_compute is specified (i.e., not nullptr), any entity not in entities_to_compute will be ommitted + //sets contribs_out to the respective distance contributions of each entity in entities_to_compute, or, for all entities in the cache + //sets contribs_sum_out to the sum of all distance contributions of entities in entities_to_compute and also, if not nullptr, in the cache. + inline void ComputeDistanceContributions(EntityReferenceSet *entities_to_compute, std::vector &contribs_out, double &contribs_sum_out) + { + buffers->neighbors.reserve(numNearestNeighbors + 1); + contribs_sum_out = 0.0; + + if(entities_to_compute == nullptr) + entities_to_compute = knnCache->GetRelevantEntities(); + + //compute distance contribution for each entity in entities_to_compute + contribs_out.resize(entities_to_compute->size()); + size_t out_index = 0; + for(auto entity_reference : *entities_to_compute) + { + double contrib = ComputeDistanceContribution(entity_reference); + + //push back distance contribution, add sub sum to global sum + contribs_sum_out += contrib; + contribs_out[out_index++] = contrib; + } + } + + //like ComputeDistanceContributions, but doesn't use contribs_sum_out and will run in parallel if applicable + inline void ComputeDistanceContributions(EntityReferenceSet *entities_to_compute, std::vector &contribs_out) + { + #ifdef MULTITHREAD_SUPPORT + //only cache concurrently if computing for all entities + if(runConcurrently && (entities_to_compute == nullptr || entities_to_compute->size() == knnCache->GetNumRelevantEntities())) + knnCache->PreCacheAllKnn(numNearestNeighbors, true); + #endif + + double contribs_sum_out = 0.0; + ComputeDistanceContributions(entities_to_compute, contribs_out, contribs_sum_out); + } + + //Like ComputeDistanceContributions, but will populate contribs_out with a value for each of the included_entities, and will set any releavant entity + // in the cache but only in included_entities to excluded_entity_distance_contribution_value, which will not be included in the contribs_sum_out + inline void ComputeDistanceContributionsFromEntities(EntityReferenceSet &included_entities, double excluded_entity_distance_contribution_value, + std::vector &contribs_out, double &contribs_sum_out) + { + buffers->neighbors.reserve(numNearestNeighbors + 1); + contribs_sum_out = 0.0; + + //compute distance contribution for each entity in entities_to_compute + contribs_out.resize(knnCache->GetNumRelevantEntities()); + size_t out_index = 0; + for(auto entity_reference : *knnCache->GetRelevantEntities()) + { + //skip entities not specified in included_entities, instead store the expected probability value of 1/n + if(!included_entities.contains(entity_reference)) + { + contribs_out[out_index++] = excluded_entity_distance_contribution_value; + //continue to the next entity without updating the contributions sum + continue; + } + + double contrib = ComputeDistanceContribution(entity_reference, included_entities); + + //push back distance contribution, add sub sum to global sum + contribs_sum_out += contrib; + contribs_out[out_index++] = contrib; + } + } + + //Computes the distance contributions for each relevant entity in the cache with the entity at holdout_entity removed from the model + //populates updated_contribs_out only with the set of entities that have different distance contributions than their base contributions + //sets updated_contribs_sum_out to the sum of all distance contributions of entities + inline void UpdateDistanceContributionsWithHoldout(EntityReference holdout_entity, double holdout_replacement_value, + const std::vector &dist_contribs, const double base_dist_contrib_sum, + std::vector> &updated_contribs_out, double &updated_contribs_sum_out) + { + updated_contribs_sum_out = base_dist_contrib_sum; + + int64_t distance_contribs_index = -1; + updated_contribs_out.reserve(knnCache->GetNumRelevantEntities()); + for(auto entity_reference : *knnCache->GetRelevantEntities()) + { + distance_contribs_index++; + + //if holdout reference, replace with expected value + if(entity_reference == holdout_entity) + { + //remove the old distance contribution from the sum + updated_contribs_sum_out -= dist_contribs[distance_contribs_index]; + + //write out whatever the replacement value is for this element + updated_contribs_out.push_back(DistanceReferencePair(holdout_replacement_value, distance_contribs_index)); + continue; + } + + //if the nearest neighbors don't include holdout_entity, the distance contribution will be unchanged + if(!knnCache->DoesCachedKnnContainEntity(entity_reference, holdout_entity, numNearestNeighbors)) + continue; + + double distance_contribution = ComputeDistanceContribution(entity_reference, holdout_entity); + + //reduce later workload by culling data here - do not mark dc as scaled/different if it actually isn't + if(dist_contribs[distance_contribs_index] == distance_contribution) + continue; + + //"replace" the distance contribution in the sum by subtracting the base contribution and adding the scaled contribution + updated_contribs_sum_out -= dist_contribs[distance_contribs_index]; + updated_contribs_sum_out += distance_contribution; + + //add scaled distance contribution for this element to output + updated_contribs_out.push_back(DistanceReferencePair(distance_contribution, distance_contribs_index)); + } + + //if all the distance contributions are zero, (which can happen if all the cases have mismatching nan values and can't be compared to each other in sparse datasets) + //set the distance contributions to be 1/n for all of them since they are all the 'same' + if(updated_contribs_sum_out == 0.0) + { + double avg_dc = 1.0 / (knnCache->GetNumRelevantEntities()); + for(size_t i = 0; i < updated_contribs_out.size(); i++) + updated_contribs_out[i].distance = avg_dc; + + updated_contribs_sum_out = updated_contribs_out.size() * avg_dc; + } + } + + //populates probabilities_out with the distance probabilities given the distance contributions + static inline void ConvertDistanceContributionsToProbabilities(const std::vector &contributions, const double contribution_sum, std::vector &probabilities_out) + { + probabilities_out.reserve(contributions.size()); + if(contribution_sum != 0) + { + for(const double &contrib : contributions) + probabilities_out.push_back(contrib / contribution_sum); + } + else //if contrib_sum == 0, then each contrib must be 0 + { + probabilities_out.resize(contributions.size(), 0.0); + } + } + + //Computes the case KL divergence or conviction for each case in entities_to_compute + //if normalize_convictions is false, it will return the kl divergences, if true, it will return the convictions + //if conviction_of_removal is true, then it will compute the conviction as if the entities not in base_group_entities were removed, + // if false, then will compute the conviction as if those entities were added or included + inline void ComputeCaseKLDivergences(EntityReferenceSet &entities_to_compute, std::vector &convictions_out, bool normalize_convictions, bool conviction_of_removal) + { + //prime the cache + #ifdef MULTITHREAD_SUPPORT + knnCache->PreCacheAllKnn(numNearestNeighbors + 1, runConcurrently); + #else + knnCache->PreCacheAllKnn(numNearestNeighbors + 1); + #endif + + //find base distance contributions + double contrib_sum = 0.0; + buffers->baseDistanceContributions.clear(); + ComputeDistanceContributions(nullptr, buffers->baseDistanceContributions, contrib_sum); + + //convert base distance contributions to probabilities + buffers->baseDistanceProbabilities.clear(); + ConvertDistanceContributionsToProbabilities(buffers->baseDistanceContributions, contrib_sum, buffers->baseDistanceProbabilities); + + //cache constants for expected values + const size_t num_relevant_entities = knnCache->GetNumRelevantEntities(); + const double probability_mass_of_non_holdouts = (1.0 - 1.0 / num_relevant_entities); + // the reciprocal of the ratio of num cases without to num cases with times the contrib_sum; cached for scaling below + // using the reciprocal here (instead of the more intuitive flip) saves a negation in the loop + const double updated_contrib_to_contrib_scale_inverse = num_relevant_entities / (contrib_sum * (num_relevant_entities - 1)); + + //for measuring kl divergence, only need to measure those entities that have a value that is different + auto &updated_distance_contribs = buffers->updatedDistanceContribs; + convictions_out.clear(); + convictions_out.reserve(num_relevant_entities); + double kl_sum = 0.0; + bool has_zero_kl = false; //flag will be set to true if there are any convictions that are 0, used later to prevent division by 0 + + //compute the scaled distance contributions and sums when any 1 case is removed from the model + //note that the kl_divergence for every non-scaled set is 0, so the sum will not change except for when a case is actually removed from the model + size_t distance_contribution_index = 0; + for(auto entity_reference : entities_to_compute) + { + //compute distance contributions of the entities whose dcs will be changed by the removal of entity_reference + updated_distance_contribs.clear(); + double updated_contrib_sum = 0.0; + buffers->neighbors.clear(); + UpdateDistanceContributionsWithHoldout(entity_reference, 1.0 / num_relevant_entities, buffers->baseDistanceContributions, contrib_sum, + buffers->updatedDistanceContribs, updated_contrib_sum); + + //convert updated_distance_contribs to probabilities + //convert via the updated contribution sum and multiply by the probability mass of everything that isn't the holdout + //multiplying a non-held out distance contribution by this value converts it into a probability + double updated_dc_to_probability = probability_mass_of_non_holdouts / updated_contrib_sum; + + //convert updated distance contribution into a probability as appropriate + for(auto &dc : updated_distance_contribs) + { + //the knockout case was already already assigned the probability + if(dc.reference != distance_contribution_index) + dc.distance *= updated_dc_to_probability; + } + + //compute KL divergence for the values which have different neighbor lists + + //need to compute the KL divergence for the cases that don't have different neighbor lists but are only scaled + //for conviction_of_removal, this can be computed as + //d_KL = sum_i -base_distance_probabilities[i] * log( base_distance_probabilities[i] / new_probabilities[i]) + //but because we know new_probabilities[i] = base_distance_probabilities[i] * dc_update_scale we can rewrite this as: + // d_KL = sum_i -base_distance_probabilities[i] * log( 1 / dc_update_scale) ) + //the logarithm doesn't change and can be pulled out of the sum (pulling out the reciprocol as -1) to be: + // d_KL = log( dc_update_scale) ) * sum_i base_distance_probabilities[i] + //but because we've already computed the kl divergence for the updated_distance_contribs (changed neighbor sets), + // we only want to compute d_KL for those that just need to be scaled + //for the opposite, the conviction of adding the case, we just flip p and q in the kl divergence: + // d_KL = sum_i -new_probabilities[i] * log( new_probabilities[i] / base_distance_probabilities[i] ) + //thus (note the negative sign due to the reciprocal of dc_update_scale): + // d_KL = sum_i -new_probabilities[i] * log( dc_update_scale) ) + double dc_update_scale = updated_contrib_sum * updated_contrib_to_contrib_scale_inverse; + + double kld_updated; + double kld_scaled; + if(conviction_of_removal) + { + kld_updated = PartialKullbackLeiblerDivergenceFromIndices(buffers->baseDistanceProbabilities, updated_distance_contribs); + + //need to find unchanged distance contribution relative to the total in order to find the total probability mass + double total_distance_contribution_unchanged = contrib_sum; + for(auto &dc : updated_distance_contribs) + total_distance_contribution_unchanged -= buffers->baseDistanceContributions[dc.reference]; + + double total_probability_mass_changed = (total_distance_contribution_unchanged / contrib_sum); + + kld_scaled = total_probability_mass_changed * std::log(dc_update_scale); + } + else + { + kld_updated = PartialKullbackLeiblerDivergenceFromIndices(updated_distance_contribs, buffers->baseDistanceProbabilities); + + //since the updated distance contribs have already been converted to probabilities, can just use them directly + double total_updated_probability_mass_changed = 1.0; + for(auto &dc : updated_distance_contribs) + total_updated_probability_mass_changed -= dc.distance; + + //negative sign due to the reciprocal of dc_update_scale + kld_scaled = -total_updated_probability_mass_changed * std::log(dc_update_scale); + } + + double kld_total = kld_updated + kld_scaled; + + //can't be negative, so clamp to zero + if(kld_total >= 0.0) + kl_sum += kld_total; + else + { + kld_total = 0.0; + has_zero_kl = true; + } + + convictions_out.push_back(kld_total); + distance_contribution_index++; + } + + //average + const double kl_avg = kl_sum / convictions_out.size(); + + //compute convictions + if(kl_avg == 0.0) //if avg is zero, every conviction becomes one (even 0/0 as the kl denominator dominates) + { + convictions_out.clear(); + convictions_out.resize(entities_to_compute.size(), 1.0); + } + else if(normalize_convictions) + { + if(has_zero_kl) + { + for(auto &kl : convictions_out) + { + if(kl != 0.0) + kl = (kl_avg / kl); + } + } + else + { + for(auto &kl : convictions_out) + kl = (kl_avg / kl); + } + } + } + + //Computes the KL divergence for adding a group of cases (a new model) to an existing model. + //This assumes that the current model is the combined model that already has the + //new cases in it with the indices of those original cases specified as base_group_entities + //if conviction_of_removal is true, then it will compute the conviction as if the entities not in base_group_entities were removed, + // if false, then will compute the conviction as if those entities were added or included + inline double ComputeCaseGroupKLDivergence(EntityReferenceSet &base_group_entities, bool conviction_of_removal) + { + //prime cache; get double the number of numNearestNeighbors in attempt to reduce the number of queries needed + // other heuristics other than 2x may be considered, and the effectiness of the heuristic entirely will depend on the overlap between the two case groups + #ifdef MULTITHREAD_SUPPORT + knnCache->PreCacheAllKnn(numNearestNeighbors * 2, runConcurrently); + #else + knnCache->PreCacheAllKnn(numNearestNeighbors * 2); + #endif + + //compute the resulting combined model distance contributions (reuse buffer) + std::vector &combined_model_distance_contribs = buffers->baseDistanceContributions; + combined_model_distance_contribs.clear(); + double contrib_sum = 0.0; + ComputeDistanceContributions(nullptr, combined_model_distance_contribs, contrib_sum); + + //compute scaled distance contributions of only the base model (only from base_group_entities) (reuse buffer) + std::vector &scaled_base_distance_contribs = buffers->baseDistanceProbabilities; + scaled_base_distance_contribs.clear(); + double scaled_base_contrib_sum; + + //compute scaled distance contributions for the base cases in but setting the remaining entities to the probability of 1/n + ComputeDistanceContributionsFromEntities(base_group_entities, 1.0 / knnCache->GetNumRelevantEntities(), + scaled_base_distance_contribs, scaled_base_contrib_sum); + + //normalize the combined model distance contributions to convert them into probabilities + double base_scalar = 1.0 / contrib_sum; + for(auto &c : combined_model_distance_contribs) + c *= base_scalar; + + //normalize each scaled distance contribution to convert them into probabilities + double prob_scalar = static_cast(base_group_entities.size()) / knnCache->GetNumRelevantEntities(); + prob_scalar /= scaled_base_contrib_sum; + + //for each element that doesn't belong to base_group_entities, scale the probabilities so that the sum is 1.0 + // while leaving existing cases not in base_group_entities as previously set to the proper probability + size_t distance_contribution_index = 0; + for(auto entity_reference : *knnCache->GetRelevantEntities()) + { + if(base_group_entities.contains(entity_reference)) + scaled_base_distance_contribs[distance_contribution_index] *= prob_scalar; + + distance_contribution_index++; + } + + //compute KL divergence + if(conviction_of_removal) + return KullbackLeiblerDivergence(combined_model_distance_contribs, scaled_base_distance_contribs); + else + return KullbackLeiblerDivergence(scaled_base_distance_contribs, combined_model_distance_contribs); + } + + protected: + + KnnCache *knnCache; + EntityQueriesStatistics::DistanceTransform *distanceTransform; + + //number of nearest neighbors + size_t numNearestNeighbors; + + //reusable memory buffers + ConvictionProcessorBuffers *buffers; + + #ifdef MULTITHREAD_SUPPORT + //if true, attempt to run with concurrency + bool runConcurrently; + #endif +}; diff --git a/src/Amalgam/ConvictionUtil.h b/src/Amalgam/ConvictionUtil.h new file mode 100644 index 00000000..4a1889b3 --- /dev/null +++ b/src/Amalgam/ConvictionUtil.h @@ -0,0 +1,53 @@ +#pragma once + +//project headers: +#include "DistanceReferencePair.h" +#include "FastMath.h" + +//system headers: +#include +#include + +//KL(P||Q) = Sum(p(i) * log( p(i) / q(i) ), natural base +inline double KullbackLeiblerDivergence(const std::vector &p, const std::vector &q) +{ + double sum = 0.0; + for(size_t i = 0; i < p.size(); i++) + { + if(q[i] != 0 && !FastIsNaN(q[i])) + sum += p[i] ? p[i] * std::log(p[i] / q[i]) : 0; + } + return sum; +} + +//computes the KL divergence between p and q.distance only for features specified by the indices given by q.reference +//i.e. this will give equivelent value if calling normal KL on p and q if p and q are the same value at indices oother than those in q.reference +//note that there are two versions of this function with the DistanceReferencePair parameters flipped +inline double PartialKullbackLeiblerDivergenceFromIndices(const std::vector &p, const std::vector> &q) +{ + double sum = 0.0; + for(const auto &changed_contrib : q) + { + const double q_i = changed_contrib.distance; + const double p_i = p[changed_contrib.reference]; + if(q_i != 0 && !FastIsNaN(q_i)) + sum += p_i ? p_i * std::log(p_i / q_i) : 0; + } + return sum; +} + +//computes the KL divergence between p.distance and q only for features specified by the indices given by p.reference +//i.e. this will give equivelent value if calling normal KL on p and q if p and q are the same value at indices other than those in p.reference +//note that there are two versions of this function with the DistanceReferencePair parameters flipped +inline double PartialKullbackLeiblerDivergenceFromIndices(const std::vector> &p, const std::vector &q) +{ + double sum = 0.0; + for(const auto &changed_contrib : p) + { + const double p_i = changed_contrib.distance; + const double q_i = q[changed_contrib.reference]; + if(q_i != 0 && !FastIsNaN(q_i)) + sum += p_i ? p_i * std::log(p_i / q_i) : 0; + } + return sum; +} diff --git a/src/Amalgam/Cryptography.cpp b/src/Amalgam/Cryptography.cpp new file mode 100644 index 00000000..0bb498eb --- /dev/null +++ b/src/Amalgam/Cryptography.cpp @@ -0,0 +1,193 @@ +//project headers: +#include "Cryptography.h" + +//3rd party headers: +extern "C" { +#include "tweetnacl/tweetnacl.h" +} + +//fills destination with length bytes of random data +//need to extern "C" because the tweetnacl.c library expects it +extern "C" void randombytes(unsigned char *destination, unsigned long long length) +{ + Platform_GenerateSecureRandomData(destination, length); +} + +std::pair GenerateSignatureKeyPair() +{ + unsigned char pk[crypto_sign_PUBLICKEYBYTES]; + unsigned char sk[crypto_sign_SECRETKEYBYTES]; + crypto_sign_keypair(&pk[0], &sk[0]); + + std::string pk_s(reinterpret_cast(&pk[0]), crypto_sign_PUBLICKEYBYTES); + std::string sk_s(reinterpret_cast(&sk[0]), crypto_sign_SECRETKEYBYTES); + + return std::make_pair(pk_s, sk_s); +} + +std::pair GenerateEncryptionKeyPair() +{ + unsigned char pk[crypto_box_PUBLICKEYBYTES]; + unsigned char sk[crypto_box_SECRETKEYBYTES]; + crypto_box_keypair(&pk[0], &sk[0]); + + std::string pk_s(reinterpret_cast(&pk[0]), crypto_box_PUBLICKEYBYTES); + std::string sk_s(reinterpret_cast(&sk[0]), crypto_box_SECRETKEYBYTES); + + return std::make_pair(pk_s, sk_s); +} + +std::string SignMessage(std::string &message, std::string &secret_key) +{ + if(secret_key.size() != crypto_sign_SECRETKEYBYTES) + return ""; + + std::string signed_message(crypto_sign_BYTES + message.size(), '\0'); + //use the same type from tweetnacl + unsigned long long signed_message_len = 0; + + crypto_sign(reinterpret_cast(&signed_message[0]), + &signed_message_len, + reinterpret_cast(&message[0]), message.size(), + reinterpret_cast(&secret_key[0])); + + //extract just the signature part + std::string signature(begin(signed_message), begin(signed_message) + crypto_sign_BYTES); + return signature; +} + +bool IsSignatureValid(std::string &message, std::string &public_key, std::string &signature) +{ + if(public_key.size() != crypto_sign_PUBLICKEYBYTES) + return false; + + if(signature.size() != crypto_sign_BYTES) + return false; + + //prepend the signature + std::string signed_message = signature + message; + + //crypto_sign_open needs the full space of the signed message as its working space + std::string original_message_buffer(crypto_sign_BYTES + message.size(), '\0'); + //variable to recieve the populated length (not used but crypto_sign_open needs it) + //use the same type from tweetnacl + unsigned long long original_message_buffer_len = 0; + + if(crypto_sign_open( + reinterpret_cast(&original_message_buffer[0]), &original_message_buffer_len, + reinterpret_cast(&signed_message[0]), signed_message.size(), + reinterpret_cast(&public_key[0])) != 0) + { + return false; + } + + return true; +} + +std::string EncryptMessage(std::string &plaintext, std::string &secret_key, std::string &nonce) +{ + if(secret_key.size() != crypto_secretbox_KEYBYTES) + return ""; + + if(nonce.size() != crypto_secretbox_NONCEBYTES) + nonce.resize(crypto_secretbox_NONCEBYTES, '\0'); + + size_t total_len = plaintext.size() + crypto_secretbox_ZEROBYTES; + std::vector message_buffer(total_len, 0); + for(size_t i = crypto_secretbox_ZEROBYTES; i < total_len; i++) + message_buffer[i] = plaintext[i - crypto_secretbox_ZEROBYTES]; + + std::string cypher_buffer(total_len, 0); + crypto_secretbox(reinterpret_cast(&cypher_buffer[0]), + &message_buffer[0], total_len, + reinterpret_cast(&nonce[0]), + reinterpret_cast(&secret_key[0])); + + cypher_buffer.erase(begin(cypher_buffer), begin(cypher_buffer) + crypto_secretbox_BOXZEROBYTES); + return cypher_buffer; +} + +std::string DecryptMessage(std::string &cyphertext, std::string &secret_key, std::string &nonce) +{ + if(secret_key.size() != crypto_secretbox_KEYBYTES) + return ""; + + if(nonce.size() != crypto_secretbox_NONCEBYTES) + nonce.resize(crypto_secretbox_NONCEBYTES, '\0'); + + size_t total_len = cyphertext.size() + crypto_secretbox_BOXZEROBYTES; + std::vector message_buffer(total_len, 0); + for(size_t i = crypto_secretbox_BOXZEROBYTES; i < total_len; i++) + message_buffer[i] = cyphertext[i - crypto_secretbox_BOXZEROBYTES]; + + std::string plaintext_buffer(total_len, 0); + if(crypto_secretbox_open(reinterpret_cast(&plaintext_buffer[0]), + &message_buffer[0], total_len, + reinterpret_cast(&nonce[0]), + reinterpret_cast(&secret_key[0]))) + { + return ""; + } + + plaintext_buffer.erase(begin(plaintext_buffer), begin(plaintext_buffer) + crypto_secretbox_ZEROBYTES); + return plaintext_buffer; +} + +std::string EncryptMessage(std::string &plaintext, + std::string &receiver_public_key, std::string &sender_secret_key, std::string &nonce) +{ + if(receiver_public_key.size() != crypto_box_PUBLICKEYBYTES) + return ""; + + if(sender_secret_key.size() != crypto_box_SECRETKEYBYTES) + return ""; + + if(nonce.size() != crypto_box_NONCEBYTES) + nonce.resize(crypto_box_NONCEBYTES, '\0'); + + size_t total_len = plaintext.size() + crypto_box_ZEROBYTES; + std::vector message_buffer(total_len, 0); + for(size_t i = crypto_box_ZEROBYTES; i < total_len; i++) + message_buffer[i] = plaintext[i - crypto_box_ZEROBYTES]; + + std::string cypher_buffer(total_len, 0); + crypto_box(reinterpret_cast(&cypher_buffer[0]), + &message_buffer[0], total_len, + reinterpret_cast(&nonce[0]), + reinterpret_cast(&receiver_public_key[0]), + reinterpret_cast(&sender_secret_key[0])); + + cypher_buffer.erase(begin(cypher_buffer), begin(cypher_buffer) + crypto_box_BOXZEROBYTES); + return cypher_buffer; +} + +std::string DecryptMessage(std::string &cyphertext, + std::string &sender_public_key, std::string &receiver_secret_key, std::string &nonce) +{ + if(sender_public_key.size() != crypto_box_PUBLICKEYBYTES) + return ""; + + if(receiver_secret_key.size() != crypto_box_SECRETKEYBYTES) + return ""; + + if(nonce.size() != crypto_box_NONCEBYTES) + nonce.resize(crypto_box_NONCEBYTES, '\0'); + + size_t total_len = cyphertext.size() + crypto_box_BOXZEROBYTES; + std::vector message_buffer(total_len, 0); + for(size_t i = crypto_box_BOXZEROBYTES; i < total_len; i++) + message_buffer[i] = cyphertext[i - crypto_box_BOXZEROBYTES]; + + std::string plaintext_buffer(total_len, 0); + if(crypto_box_open(reinterpret_cast(&plaintext_buffer[0]), + &message_buffer[0], total_len, + reinterpret_cast(&nonce[0]), + reinterpret_cast(&sender_public_key[0]), + reinterpret_cast(&receiver_secret_key[0]))) + { + return ""; + } + + plaintext_buffer.erase(begin(plaintext_buffer), begin(plaintext_buffer) + crypto_box_ZEROBYTES); + return plaintext_buffer; +} diff --git a/src/Amalgam/Cryptography.h b/src/Amalgam/Cryptography.h new file mode 100644 index 00000000..79dbfe7c --- /dev/null +++ b/src/Amalgam/Cryptography.h @@ -0,0 +1,37 @@ +#pragma once + +//project headers: +#include "PlatformSpecific.h" + +//system headers: +#include + +//generates a public and secret key for signing, returned in that order +std::pair GenerateSignatureKeyPair(); + +//generates a public and secret key for encryption, returned in that order +std::pair GenerateEncryptionKeyPair(); + +//returns the signature for the given message and secret_key +std::string SignMessage(std::string &message, std::string &secret_key); + +//returns true if the signature is valid for the message given the public key +bool IsSignatureValid(std::string &message, std::string &public_key, std::string &signature); + +//returns an encrypted form of the message plaintext secret key and nonce (of up to 24 bytes) +// nonce will be resized and padded with 0s if not the right size +std::string EncryptMessage(std::string &plaintext, std::string &secret_key, std::string &nonce); + +//returns an decrypted form of the message cyphertext given secret key and nonce (of up to 24 bytes) +// nonce will be resized and padded with 0s if not the right size +std::string DecryptMessage(std::string &cyphertext, std::string &secret_key, std::string &nonce); + +//returns an encrypted form of the message plaintext given public and secret keys and nonce (of up to 24 bytes) +// nonce will be resized and padded with 0s if not the right size +std::string EncryptMessage(std::string &plaintext, + std::string &receiver_public_key, std::string &sender_secret_key, std::string &nonce); + +//returns an decrypted form of the message cyphertext given public and secret keys and nonce (of up to 24 bytes) +// nonce will be resized and padded with 0s if not the right size +std::string DecryptMessage(std::string &cyphertext, + std::string &sender_public_key, std::string &receiver_secret_key, std::string &nonce); diff --git a/src/Amalgam/DateTimeFormat.cpp b/src/Amalgam/DateTimeFormat.cpp new file mode 100644 index 00000000..f9b2b4c3 --- /dev/null +++ b/src/Amalgam/DateTimeFormat.cpp @@ -0,0 +1,350 @@ +//project headers: +#include "DateTimeFormat.h" + +#include "PlatformSpecific.h" + +//3rd party headers: +#include "date/date.h" +#include "date/tz.h" + +std::string SetTimeZoneDatabasePath(std::string path) +{ + //only do this on Windows -- other operating systems use the built-in timezone database +#ifdef OS_WINDOWS + if(path.empty()) + { + std::string error; + if(Platform_IsResourcePathAccessible("./tzdata", true, error)) + { + path = "./tzdata"; + } + else if(Platform_IsResourcePathAccessible(Platform_GetHomeDirectory() + "/.amalgam/tzdata", true, error)) + { + path = Platform_GetHomeDirectory() + "/.amalgam/tzdata"; + } + else if(Platform_IsResourcePathAccessible(Platform_GetHomeDirectory() + "/.howso/tzdata", true, error)) + { + path = Platform_GetHomeDirectory() + "/.howso/tzdata"; + } + else + { + std::cerr << "Warning: Could not find time zone database (tzdata directory) in: current working directory, ~/.amalgam, or ~/.howso.\n"; + std::cerr << "Instead, all dates will use UTC.\n"; + std::cerr << "The tzdata directory should contain the files from https://www.iana.org/time-zones and https://github.com/unicode-org/cldr/blob/master/common/supplemental/windowsZones.xml .\n"; + } + } + + if(!path.empty()) + date::set_install(path); +#endif + + return path; +} + +// return true if format is year-month or month-year, where year is %Y and month is one of %m, %b, %B or %h, with any single character separator +inline bool IsFormatMonthAndYearOnly(const std::string &s) +{ + //expected formats should be length of 5, e.g., "%m-%Y" + if(s.size() != 5) + return false; + + if(s[1] == 'Y' && (s[4] == 'm' || s[4] == 'b' || s[4] == 'B' || s[4] == 'h')) + return true; + + if(s[4] == 'Y' && (s[1] == 'm' || s[1] == 'b' || s[1] == 'B' || s[1] == 'h')) + return true; + + return false; +} + +//edits s in place to make sure that it has only valid format specifiers (%'s as specified by the strftime standards) +// if a format specifier is invalid, it will eliminate it and preserve the underlying characters +// returns true if the format string contains a %z offset +inline bool ConstrainDateTimeStringToValidFormat(std::string &s) +{ + bool has_time_offset = false; + + for(size_t index = 0; index < s.size(); index++) + { + //skip over non-specifier characters + if(s[index] != '%') + continue; + + //specifiers need to have at least one character afterward + if(index + 1 >= s.size()) + { + //get rid of the % character and finish + s.pop_back(); + break; + } + + switch(s[index + 1]) + { + //valid single-caracter format specifiers + case 'z': + { + index++; + has_time_offset = true; + continue; + } + + case '%': case 'a': case 'A': case 'b': case 'B': case 'C': case 'c': case 'd': case 'D': case 'e': case 'F': case 'G': + case 'g': case 'h': case 'H': case 'I': case 'j': case 'm': case 'M': case 'n': case 'p': case 'r': case 'R': case 'S': + case 't': case 'T': case 'U': case 'u': case 'V': case 'W': case 'w': case 'x': case 'X': case 'Y': case 'y': case 'Z': + { + index++; + continue; + } + + //valid double-character format specifiers + case 'E': + { + if(index + 2 >= s.size()) + { + //remove all remaining including the format specifier + s.erase(begin(s) + index, end(s)); + continue; + } + + switch(s[index + 2]) + { + case 'C': case 'c': case 'x': case 'X': case 'Y': case 'y': + { + index += 2; + continue; + } + default: + break; + } + + break; + } + + case 'O': + { + if(index + 2 >= s.size()) + { + //remove all remaining including the format specifier + s.erase(begin(s) + index, end(s)); + continue; + } + + switch(s[index + 2]) + { + case 'd': case 'e': case 'H': case 'I': case 'm': case 'M': case 'S': + case 'U': case 'u': case 'V': case 'W': case 'w': case 'y': + { + index += 2; + continue; + } + default: + break; + } + + break; + } + + //invalid format specifier, replace % with a space + default: + s[index] = ' '; + break; + } + + } + + return has_time_offset; +} + + +//finds a vector of time zones corresponding to the abbreviation +// from https://github.com/HowardHinnant/date/wiki/Examples-and-Recipes#convert_by_timezone_abbreviation +template +std::vector>> FindZonesByAbbrev(date::sys_time tp, const std::string &abbrev) +{ + using namespace std::chrono; + using namespace date; + std::vector>> results; + auto &db = get_tzdb(); + for(auto &z : db.zones) + { + if(z.get_info(tp).abbrev == abbrev) + results.emplace_back(&z, tp); + } + return results; +} + +//returns the time_zone corresponding with the string timezone +// if timezone is an abbreviation, it will only select a timezone if it is a unique timezone corresponding to the abbreviation +const date::time_zone *GetTimeZoneFromString(const std::string &timezone) +{ + // if timezone wasn't specified, return local timezone + if(timezone.empty()) + return date::current_zone(); + + const date::time_zone *tz; + try + { + tz = date::locate_zone(timezone); + } + catch(...) + { + // search DB of timezones to find one that may match this abbreviation - note the first time this is run it may take several seconds + // pass in 'now' as the timestamp, we only need the list of possible matching timezones, timestamp is not relevant + auto now = std::chrono::system_clock::now(); + auto results = FindZonesByAbbrev(now, timezone); + + //only use the time zone if there is a unique corresponding timezone + if(results.size() == 1) + tz = results[0].get_time_zone(); + else //if can't find anything or ambiguous, so use current timezone + tz = date::current_zone(); + } + + return tz; +} + + +//don't pass locale by reference so can default it +double GetNumSecondsSinceEpochFromDateTimeString(const std::string &datetime, std::string format, std::string locale, std::string timezone) +{ + bool has_time_offset = ConstrainDateTimeStringToValidFormat(format); + + std::chrono::system_clock::time_point dt; + std::istringstream ss{ datetime }; + std::string in_date_timezone = ""; + + if(!locale.empty()) + { + //make sure it's utf-8 + locale += ".utf-8"; + //if the locale is valid, use it + try + { + auto cur_locale = std::locale(locale); + ss.imbue(cur_locale); + } + catch(...) + { + } + } + + try + { + if(IsFormatMonthAndYearOnly(format)) + { + //month and year only dates must be parsed specifically into year_month + date::year_month ym; + ss >> date::parse(format, ym, in_date_timezone); + //convert to time_point by specifiying the day to be 1 for the parsed year month + dt = date::sys_days{ ym / 1 }; + } + else + { + //parse string into dt and if there was a timezone in the string, stores that into in_date_timezone + ss >> date::parse(format, dt, in_date_timezone); + } + } + catch(...) + { + } + + //overwrite the passed-in timezone if one was parsed out of the datetime string + if(!in_date_timezone.empty()) + timezone = in_date_timezone; + //if there is no timezone defined, but the format has a time offset provided via %z, assume the offset is UTC + else if(has_time_offset) + timezone = "UTC"; + + const date::time_zone* t_z = GetTimeZoneFromString(timezone); + + // convert parsed date to the specified timezone + auto zoned_datetime = date::make_zoned(t_z, dt); + + // calculate the difference in seconds between UTC and the specified time zone + int64_t diff = std::chrono::duration_cast(zoned_datetime.get_sys_time().time_since_epoch()).count() - + std::chrono::duration_cast(zoned_datetime.get_local_time().time_since_epoch()).count(); + + //add the difference to the parsed date time, resulting in in a datetime that's the specified time at UTC + //eg: if 10:00:00 was passed in without a time zone, and if local zone is EST, it's actually +5 hours, so 15:00:00 UTC. + dt += std::chrono::seconds(diff); + + //output seconds while keeping original precision by casting the value to microseconds and then dividing by 1000000.0 + return std::chrono::duration_cast(dt.time_since_epoch()).count() / 1000000.0; +} + + +//converts a datetime time point into a string specified by format, locale, and time zone t_z +// templated so it will properly cast the TimepointType and round to the appropriate number of digits +// locale is not specified as constant or passed reference because this function may modify the string +// and needs to call the copy constructor +template +std::string ConvertZonedDateTimeToString(TimepointType datetime, const std::string &format, std::string locale, const date::time_zone *tz) +{ + auto zoned_dt = date::make_zoned(tz, datetime); + + std::ostringstream os; + if(locale.empty()) + { + try + { + os << date::format(format, zoned_dt); + } + catch(...) + { + //can't emit anything + } + } + else + { + //make sure it's utf-8 + locale += ".utf-8"; + //if the locale is valid, use it + try + { + auto cur_locale = std::locale(locale); + os << date::format(cur_locale, format, zoned_dt); + } + catch(...) + { + try + { + os << date::format(format, zoned_dt); + } + catch(...) + { + //can't emit anything + } + } + } + + return os.str(); +} + +//format and locale are not passed by reference because both need a copy +std::string GetDateTimeStringFromNumSecondsSinceEpoch(double num_secs_from_epoch, std::string format, const std::string &locale, const std::string &timezone) +{ + bool has_time_offset = ConstrainDateTimeStringToValidFormat(format); + + bool has_fractional_seconds = (num_secs_from_epoch != static_cast(num_secs_from_epoch)); + + std::chrono::system_clock::time_point datetime; + datetime = std::chrono::system_clock::time_point(std::chrono::duration_cast(std::chrono::duration(static_cast(num_secs_from_epoch)))); + + //if there is no timezone defined, but the format has a time offset provided via %z assume the offset is UTC + const date::time_zone *tz = nullptr; + if(timezone.empty() && has_time_offset) + tz = GetTimeZoneFromString("UTC"); + else + tz = GetTimeZoneFromString(timezone); + + //round to the appropriate precision for seconds + if(has_fractional_seconds) + return ConvertZonedDateTimeToString(datetime, format, locale, tz); + else + { + auto rounded_timepoint = std::chrono::floor(datetime); + return ConvertZonedDateTimeToString(rounded_timepoint, format, locale, tz); + } +} + +std::string _time_zone_database_path = SetTimeZoneDatabasePath(); diff --git a/src/Amalgam/DateTimeFormat.h b/src/Amalgam/DateTimeFormat.h new file mode 100644 index 00000000..75247b81 --- /dev/null +++ b/src/Amalgam/DateTimeFormat.h @@ -0,0 +1,16 @@ +#pragma once + +//system headers: +#include + +//sets the path of the timezone database (from http://www.iana.org/time-zones ) +// used for time zones. If no path is specified, it will look in default locations +//returns the path used +std::string SetTimeZoneDatabasePath(std::string path = ""); + +//parses datetime based on format and locale and returns the number of seconds from "Epoch" (January 1, 1970) +double GetNumSecondsSinceEpochFromDateTimeString(const std::string &datetime, std::string format, std::string locale, std::string timezone); + +//transforms timepoint into the datetime string specified by format and locale based on "Epoch" (January 1, 1970) +// positive and negative values of num_secs_from_epoc are allowed +std::string GetDateTimeStringFromNumSecondsSinceEpoch(double num_secs_from_epoch, std::string format, const std::string &locale, const std::string &timezone); diff --git a/src/Amalgam/DistanceReferencePair.h b/src/Amalgam/DistanceReferencePair.h new file mode 100644 index 00000000..f835a162 --- /dev/null +++ b/src/Amalgam/DistanceReferencePair.h @@ -0,0 +1,98 @@ +#pragma once + +#include + +//used to manage pairs of distance and a reference +// where operations take place more frequently via distance first, such that cache access is optimized +// the default type is size_t for indices +template +class DistanceReferencePair +{ +public: + constexpr DistanceReferencePair() + : distance(0), reference(0) + { } + + constexpr DistanceReferencePair(double _distance, ReferenceType _reference) + : distance(_distance), reference(_reference) + { } + + constexpr bool operator <(const DistanceReferencePair &drp) const + { + return distance < drp.distance; + } + + constexpr bool operator <=(const DistanceReferencePair &drp) const + { + return distance <= drp.distance; + } + + constexpr bool operator ==(const DistanceReferencePair &drp) const + { + return distance == drp.distance; + } + + constexpr bool SameReference(const DistanceReferencePair &drp) const + { + return reference == drp.reference; + } + + //returns a reference that will always be invalid, that should, for all practical purposes, always return + // false if compared via equality against a valid reference + static constexpr ReferenceType InvalidReference() + { + return static_cast(-1LL); + } + + double distance; + ReferenceType reference; +}; + +//related to distance index pair but for size_t as a computed feature count in addition to distance +template +class CountDistanceReferencePair +{ +public: + constexpr CountDistanceReferencePair(size_t _count, double _distance, ReferenceType _reference) + : count(_count), distance(_distance), reference(_reference) + { } + + //a larger count means more has been computed, and minimum distance should be found + // with the largest number of computed features + constexpr bool operator <(const CountDistanceReferencePair &cdrp) const + { + if(count == cdrp.count) + return distance < cdrp.distance; + return count > cdrp.count; + } + + //a larger count means more has been computed, and minimum distance should be found + // with the largest number of computed features + constexpr bool operator <=(const CountDistanceReferencePair &cdrp) const + { + if(count == cdrp.count) + return distance <= cdrp.distance; + return count >= cdrp.count; + } + + constexpr bool operator ==(const CountDistanceReferencePair &cdrp) const + { + return count == cdrp.count && distance == cdrp.distance; + } + + constexpr bool SameReference(const DistanceReferencePair &drp) const + { + return reference == drp.reference; + } + + //returns a reference that will always be invalid, that should, for all practical purposes, always return + // false if compared via equality against a valid reference + static constexpr ReferenceType InvalidReference() + { + return static_cast(-1LL); + } + + size_t count; + double distance; + ReferenceType reference; +}; diff --git a/src/Amalgam/FastEMath.h b/src/Amalgam/FastEMath.h new file mode 100644 index 00000000..f0932b7a --- /dev/null +++ b/src/Amalgam/FastEMath.h @@ -0,0 +1,94 @@ +#pragma once + +#include +#include +#include + +#include "fast_log/src/exp_table.h" + +//code below from fast_log library with modifications for error checking and performance +//see fast_log directory for original code and details + +inline double FastExp(double x) +{ + if(x != x) + return std::numeric_limits::quiet_NaN(); + + int64_t offset = static_cast(x); + //x now only contains the fractional part + x -= offset; + + offset += 710; + if(offset < 0) + return 0; + if(offset >= 1420) + return std::numeric_limits::infinity(); + + // Use a 4-part polynomial to approximate exp(x); + double c[] = { 0.28033708, 0.425302, 1.01273643, 1.00020947 }; + + // Use Horner's method to evaluate the polynomial. + double val = c[3] + x * (c[2] + x * (c[1] + x * (c[0]))); + return val * EXP_TABLE[offset]; +} + +template +inline To bit_cast(const From &src) +{ + To dst; + std::memcpy(&dst, &src, sizeof(To)); + return dst; +} + +//returns the exponent and a normalized mantissa with the relationship: +//[a * 2^b] = x +inline std::pair FastFrexp(double x) +{ + uint64_t bits = bit_cast(x); + if(bits == 0) + return {0., 0}; + + // See: + // https://en.wikipedia.org/wiki/IEEE_754#Basic_and_interchange_formats + + // Extract the 52-bit mantissa field. + uint64_t mantissa = bits & 0xFFFFFFFFFFFFF; + bits >>= 52; + + // Extract the 11-bit exponent field, and add the bias. + int exponent = int(bits & 0x7ff) - 1023; + bits >>= 11; + + // Extract the sign bit. + uint64_t sign = bits; + bits >>= 1; + + // Construct the normalized double; + uint64_t res = sign; + res <<= 11; + res |= 1023 - 1; + res <<= 52; + res |= mantissa; + + double frac = bit_cast(res); + return { frac, exponent + 1 }; +} + +inline double FastLog(double x) +{ + /// Extract the fraction, and the power-of-two exponent. + + auto a = FastFrexp(x); + x = a.first; + int pow2 = a.second; + + // Use a 4-part polynom to approximate log2(x); + double c[] = { 1.33755322, -4.42852392, 6.30371424, -3.21430967 }; + double log2 = 0.6931471805599453; + + // Use Horner's method to evaluate the polynomial. + double val = c[3] + x * (c[2] + x * (c[1] + x * (c[0]))); + + // Compute log2(x), and convert the result to base-e. + return log2 * (pow2 + val); +} diff --git a/src/Amalgam/FastMath.h b/src/Amalgam/FastMath.h new file mode 100644 index 00000000..e1dfe13b --- /dev/null +++ b/src/Amalgam/FastMath.h @@ -0,0 +1,188 @@ +#pragma once + +//system headers: +#include +#include +#include + +#if !defined(_MSC_VER) +#define __forceinline __attribute__((always_inline)) inline +#endif + +#include "FastEMath.h" + +//On some platforms, std::isnan creates a costly function call. This is correct and at least as fast or faster. +template +constexpr bool FastIsNaN(const T n) +{ + return n != n; +} + +//returns true if both are equal, also counting both being NaN +template +constexpr bool EqualIncludingNaN(const T a, const T b) +{ + return (a == b) || (FastIsNaN(a) && FastIsNaN(b)); +} + +//like EqualIncludingNaN, but for containers that require an object +class DoubleNanHashComparator +{ +public: + constexpr bool operator()(const double a, double b) const + { + return EqualIncludingNaN(a, b); + } +}; + +//raises base to a nonnegative integer exponent +__forceinline double FastPowIntegerNonNegativeExp(double base, int64_t exponent) +{ + double r = 1.0; + while(exponent != 0) + { + if((exponent & 1) != 0) + r *= base; + + base *= base; + exponent >>= 1; + } + return r; +} + +__forceinline double FastPowApplyFractionalPartOfExponent(double value_raised_to_integer_power, double base, double fraction_part_of_exponent) +{ + int64_t base_as_raw_int = *(reinterpret_cast(&base)); + int64_t result_as_raw_int = static_cast((fraction_part_of_exponent * (base_as_raw_int - 4606921280493453312LL)) + 4606921280493453312LL); + return value_raised_to_integer_power * (*reinterpret_cast(&result_as_raw_int)); +} + +//Same as FastPow() but assumes the exponent is not zero +// note: no need to check if exponent==0 since we don't use FastPow in p=0 flow, and we never allow negative base since we always pass in the abs diff +inline double FastPowNonZeroExp(double base, double exponent) +{ + if(base == 0.0) + return 0; + + if(exponent >= 0) + { + //find the fraction of the exponent + int64_t abs_int_exp = static_cast(exponent); + double fraction_part_of_exponent = exponent - abs_int_exp; + + double r = FastPowIntegerNonNegativeExp(base, abs_int_exp); + if(fraction_part_of_exponent == 0.0) + return r; + + return FastPowApplyFractionalPartOfExponent(r, base, fraction_part_of_exponent); + } + else //negative exponent + { + //not a common value, so only check if we already know the exponent is negative + if(exponent == -std::numeric_limits::infinity()) + return 0; + + exponent = -exponent; + + //find the fraction of the exponent + int64_t abs_int_exp = static_cast(exponent); + double fraction_part_of_exponent = exponent - abs_int_exp; + + double r = FastPowIntegerNonNegativeExp(base, abs_int_exp); + if(fraction_part_of_exponent != 0.0) + r = FastPowApplyFractionalPartOfExponent(r, base, fraction_part_of_exponent); + + return 1.0 / r; + } +} + +//faster but less accurate replacement for std::pow +// based on the algorithm outlined by Martin Ankerl on his blog posts here: +// https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp/ +// https://martin.ankerl.com/2007/10/04/optimized-pow-approximation-for-java-and-c-c/ +// and https://martin.ankerl.com/2007/02/11/optimized-exponential-functions-for-java/ +// which are based on the exponential approximation in the paper +// Schraudolph, Nicol N. "A fast, compact approximation of the exponential function." Neural Computation 11.4 (1999): 853-862. +// where pow is found by solving and optimizing the ln and exp functions in the paper via a^b = e^(ln(a^b)) = e^(ln(a) * b) +// also improves the approximation (at a cost of speed) by using exponentiation by squaring +// because the results appear to be monotonic and relatively close for a wide range of values, including small and larg exponents +// this seems to be acceptable for many calculations +inline double FastPow(double base, double exponent) +{ + if(base == 0.0) + return 0.0; + if(exponent == 0.0) + return 1.0; + if(base < 0 && std::abs(exponent) < 1) + return std::numeric_limits::quiet_NaN(); + + return FastPowNonZeroExp(base, exponent); +} + +//fast replacement for std::pow, optimized for raising many numbers +//to the same exponent +class RepeatedFastPow +{ +public: + inline RepeatedFastPow() + { + SetExponent(1.0); + } + + inline RepeatedFastPow(double _exponent) + { + SetExponent(_exponent); + } + + inline void SetExponent(double _exponent) + { + exponent = _exponent; + + double abs_exponent = std::abs(exponent); + + absoluteIntegerExponent = static_cast(abs_exponent); + fractionPartOfExponent = abs_exponent - absoluteIntegerExponent; + } + + inline double FastPow(double base) + { + if(base == 0.0) + return 0.0; + if(exponent == 0.0) + return 1.0; + if(base < 0 && std::abs(exponent) < 1) + return std::numeric_limits::quiet_NaN(); + + return FastPowNonZeroExpNonzeroBase(base); + } + +protected: + + inline double FastPowNonZeroExpNonzeroBase(double base) + { + if(exponent >= 0) + { + double r = FastPowIntegerNonNegativeExp(base, absoluteIntegerExponent); + if(fractionPartOfExponent == 0.0) + return r; + + return FastPowApplyFractionalPartOfExponent(r, base, fractionPartOfExponent); + } + else //negative exponent + { + //not a common value, so only check if we already know the exponent is negative + if(exponent == -std::numeric_limits::infinity()) + return 0; + + double r = FastPowIntegerNonNegativeExp(base, absoluteIntegerExponent); + if(fractionPartOfExponent != 0.0) + r = FastPowApplyFractionalPartOfExponent(r, base, fractionPartOfExponent); + + return 1.0 / r; + } + } + + double exponent; + int64_t absoluteIntegerExponent; + double fractionPartOfExponent; +}; diff --git a/src/Amalgam/FilenameEscapeProcessor.h b/src/Amalgam/FilenameEscapeProcessor.h new file mode 100644 index 00000000..81fb1c1a --- /dev/null +++ b/src/Amalgam/FilenameEscapeProcessor.h @@ -0,0 +1,166 @@ +#pragma once + +//project headers: +#include "PlatformSpecific.h" + +//system headers: +#include + +class FilenameEscapeProcessor +{ +public: + static const char escape_char = '_'; + static const size_t num_bytes_per_char = 1; + static const size_t num_hex_values_per_char = 2 * num_bytes_per_char; + static_assert(num_hex_values_per_char == 2, "hex string escaping only supports 2 hex per char for now (requires some generalization of member functions beyond 2)"); + + //returns true if the char c is safe to leave unescaped in a filename string for amalgam + //safe characters: [0-9][a-z][A-Z] + //Devnote: character ranges given as numeric as the function only works if '0' < '9', '9' < 'a', etc. + //The static_asserts give no performance impact, but basically assert that the above is true AND the desired characters are the actual values specified + static bool IsUnescapedCharSafe(const char c) + { + // lower bound limit // + static_assert('0' == 48, "inconsistent character values."); + if(c < 48) //'0' + return false; + + // ranges // + //0-9 + static_assert('9' == 57, "inconsistent character values."); + if(c <= 57) //'9' + return true; + + //reject between '9' and 'A' + static_assert('A' == 65, "inconsistent character values."); + if(c < 65) //'A' + return false; + + //A-Z + static_assert('Z' == 90, "inconsistent character values."); + if(c <= 90) //'Z' + return true; + + //reject between 'Z' and 'a' + static_assert('a' == 97, "inconsistent character values."); + if(c < 97) //'a' + return false; + + //a-z + static_assert('z' == 122, "inconsistent character values."); + if(c <= 122) //'z' + return true; + + //anything beyond 122 'z' is rejected + return false; + } + + //converts a [0-15] (4-bit) value to a single char of its associated hexidecimal character + static char DecimalToHex(const uint8_t c) + { + assert(c < 16); //value must be 4-bits only + + if(c >= 10) + return c - 10 + 'a'; + + return c + '0'; + } + + static constexpr uint8_t HexToDecimal(const char c) + { + if(c >= '0') + { + if(c <= '9') + return c - '0'; + + if(c >= 'a' && c <= 'f') + return c - 'a' + 10; + if(c >= 'A' && c <= 'F') + return c - 'A' + 10; + } + + //invalid and possibly unsafe char is not a hex value, return 0 as having no value + return 0; + } + + //generates the 2 escape hex characters for a given an 8-bit character + static void GetEscapeHexFromCharValue(char c, char &high_out, char &low_out) + { + low_out = DecimalToHex(15 & c); + high_out = DecimalToHex(15 & (c >> 4)); + } + + //Get the 8-bit value represented by 2 4-bit hex characters + static constexpr char GetCharValueFromEscapeHex(const char high, const char low) + { + return HexToDecimal(low) + ((HexToDecimal(high) << 4) & 240); + } + + //returns a copy of string where all potentially unsafe characters are escaped + //see IsUnescapedCharSafe() for list of safe characters + static std::string SafeEscapeFilename(const std::string &string) + { + std::string out; + + if(string.length() == 0) + return out; + + char escape_buffer[1 + num_hex_values_per_char]; //store 1 escape char '_' + 2 hex digit chars per byte + escape_buffer[0] = escape_char; + + for(const auto a : string) + { + if(IsUnescapedCharSafe(a)) + out += a; + else + { + GetEscapeHexFromCharValue(a, escape_buffer[1], escape_buffer[2]); + out.append(&escape_buffer[0], sizeof(escape_buffer)); + } + } + + return out; + } + + //returns a copy of string where escaped characters are converted back to their 8-bit values + //any character sequence _xx is converted to a single 8-bit character using xx as the hex code + static std::string SafeUnescapeFilename(const std::string &string) + { + std::string out; + + if(string.length() == 0) + return out; + + uint8_t escape_index = 0; + char escape_hex[num_hex_values_per_char] = { 0, 0 }; + for(const auto a : string) + { + if(a == escape_char) + { + assert(escape_index == 0); //must complete a previous escape before starting a new one + escape_index = num_hex_values_per_char; + } + else + { + if(escape_index > 0) + { + escape_index--; + escape_hex[escape_index] = a; //filled backwards for speed (see below) + + if(escape_index == 0) + { + out += GetCharValueFromEscapeHex(escape_hex[1], escape_hex[0]); //escape hex is filled backwards so swap digits + escape_hex[0] = 0; + escape_hex[1] = 0; + } + } + else + { + out += a; + } + } + } + + return out; + } +}; diff --git a/src/Amalgam/GeneralizedDistance.h b/src/Amalgam/GeneralizedDistance.h new file mode 100644 index 00000000..ad592e09 --- /dev/null +++ b/src/Amalgam/GeneralizedDistance.h @@ -0,0 +1,827 @@ +#pragma once + +//project headers: +#include "EvaluableNode.h" +#include "EvaluableNodeTreeManipulation.h" +#include "FastMath.h" + +//system headers: +#include +#include + +//If defined, will use the Laplace LK metric (default). Otherwise will use Gaussian. +#define DISTANCE_USE_LAPLACE_LK_METRIC true + +//general class of feature comparisons +// align at 64-bits in order to play nice with data alignment where it is used +enum FeatureDifferenceType : uint64_t +{ + FDT_NOMINAL, + //continuous, but without cycles + FDT_CONTINUOUS_NUMERIC, + //like FDT_CONTINUOUS_NUMERIC, but guarantees everything is always numeric + FDT_CONTINUOUS_UNIVERSALLY_NUMERIC, + //like FDT_CONTINUOUS_NUMERIC, but has cycles + FDT_CONTINUOUS_NUMERIC_CYCLIC, + //edit distance between strings + FDT_CONTINUOUS_STRING, + //continuous measures of the number of nodes different between two sets of code + FDT_CONTINUOUS_CODE, +}; + +//base data struct for holding distance parameters and metadata +//generalizes Minkowski distance, information theoretic surprisal as a distance, and LukaszykKarmowski +class GeneralizedDistance +{ +public: + //initialization functions + + //dynamically precompute and cache nominal deltas and defaults everytime the pValue is set + inline void SetAndConstrainParams() + { + inversePValue = 1.0 / pValue; + + ComputeNominalDistanceTerms(); + + bool compute_approximate = NeedToPrecomputeApproximate(); + if(compute_approximate) + { + fastPowP = RepeatedFastPow(pValue); + fastPowInverseP = RepeatedFastPow(inversePValue); + } + + //default to the accuracy that should be used first + if(recomputeAccurateDistances) + SetHighAccuracy(false); + else + SetHighAccuracy(highAccuracy); + } + + //update usingHighAccuracy and nominal defaults + inline void SetHighAccuracy(bool high_accuracy) + { + //need to have asked for high_accuracy and have computed high accuracy + // or just not have computed low accuracy at all + if( (high_accuracy && NeedToPrecomputeAccurate())) + defaultPrecision = ExactApproxValuePair::EXACT; + else + defaultPrecision = ExactApproxValuePair::APPROX; + } + + //computes and sets unknownToUnknownDistanceTerm and knownToUnknownDistanceTerm based on + // unknownToUnknownDifference and knownToUnknownDifference respectively + inline void ComputeAndStoreUncertaintyDistanceTerms(size_t index) + { + bool compute_accurate = NeedToPrecomputeAccurate(); + bool compute_approximate = NeedToPrecomputeApproximate(); + + auto &feature_params = featureParams[index]; + + //compute unknownToUnknownDistanceTerm + if(compute_accurate) + { + feature_params.unknownToUnknownDistanceTerm.SetValue( + ComputeDistanceTermNonNull(feature_params.unknownToUnknownDifference, + index, ExactApproxValuePair::EXACT), + ExactApproxValuePair::EXACT); + } + + if(compute_approximate) + { + feature_params.unknownToUnknownDistanceTerm.SetValue( + ComputeDistanceTermNonNull(feature_params.unknownToUnknownDifference, + index, ExactApproxValuePair::APPROX), + ExactApproxValuePair::APPROX); + } + + //if knownToUnknownDifference is same as unknownToUnknownDifference, can copy distance term instead of recomputing + if(feature_params.knownToUnknownDifference == feature_params.unknownToUnknownDifference) + { + feature_params.knownToUnknownDistanceTerm = feature_params.unknownToUnknownDistanceTerm; + return; + } + + //compute knownToUnknownDistanceTerm + if(compute_accurate) + { + feature_params.knownToUnknownDistanceTerm.SetValue( + ComputeDistanceTermNonNull(feature_params.knownToUnknownDifference, + index, ExactApproxValuePair::EXACT), + ExactApproxValuePair::EXACT); + } + + if(compute_approximate) + { + feature_params.knownToUnknownDistanceTerm.SetValue( + ComputeDistanceTermNonNull(feature_params.knownToUnknownDifference, + index, ExactApproxValuePair::APPROX), + ExactApproxValuePair::APPROX); + } + } + + // 2/sqrt(pi) = 2.0 / std::sqrt(3.141592653589793238462643383279502884L); + static constexpr double s_two_over_sqrt_pi = 1.12837916709551257390; + + //sqrt(2.0) + static constexpr double s_sqrt_2 = 1.41421356237309504880; + + __forceinline static double ComputeDeviationPartLaplace(const double diff, const double deviation) + { + return std::exp(-diff / deviation) * (3 * deviation + diff) / 2; + } + + __forceinline static double ComputeDeviationPartLaplaceApprox(const double diff, const double deviation) + { + return FastExp(-diff / deviation) * (3 * deviation + diff) / 2; + } + + __forceinline static double ComputeDeviationPartGaussian(const double diff, const double deviation) + { + const double term = diff / (2.0 * deviation); //diff / (2*sigma) + return s_two_over_sqrt_pi * deviation * std::exp(-term * term) - diff * std::erfc(term); //2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma)) + } + + __forceinline static double ComputeDeviationPartGaussianApprox(const double diff, const double deviation) + { + const double term = diff / (2.0 * deviation); //diff / (2*sigma) + return s_two_over_sqrt_pi * deviation * FastExp(-term * term) - diff * std::erfc(term); //2*sigma*(e^(-1*(diff^2)/((2*simga)^2)))/sqrt(pi) - diff*erfc(diff/(2*sigma)) + } + + //computes the LukaszykKarmowski metric deviation component for the minkowski distance equation given the feature difference and feature deviation + //assumes deviation is nonnegative + __forceinline double ComputeDeviationPart(const double diff, const double deviation) + { + if(defaultPrecision == ExactApproxValuePair::EXACT) + #ifdef DISTANCE_USE_LAPLACE_LK_METRIC + return ComputeDeviationPartLaplace(diff, deviation); + #else + return ComputeDeviationPartGaussian(diff, deviation); + #endif + else + #ifdef DISTANCE_USE_LAPLACE_LK_METRIC + return ComputeDeviationPartLaplaceApprox(diff, deviation); + #else + return ComputeDeviationPartGaussianApprox(diff, deviation); + #endif + } + + //constrains the difference to the cycle length for cyclic distances + __forceinline static double ConstrainDifferenceToCyclicDifference(double difference, double cycle_length) + { + //cyclics that are less than a cycle apart, the distance is the closer of: calculated distance or the looped distance of cycle length - calculated distance + //for distances that are larger than a cycle, reduce it by taking the mod of it and do the same type of comparison + if(difference > cycle_length) + difference = std::fmod(difference, cycle_length); + + return std::min(difference, cycle_length - difference); + } + +protected: + + constexpr bool NeedToPrecomputeApproximate() + { + return (!highAccuracy || recomputeAccurateDistances); + } + + constexpr bool NeedToPrecomputeAccurate() + { + return (highAccuracy || recomputeAccurateDistances); + } + + //stores a pair of exact and approximate values + // which can be referenced by getting the value at the corresponding offset + //the values default to 0.0 on initialization + class ExactApproxValuePair + { + public: + //offset for each precision level + static constexpr int APPROX = 0; + static constexpr int EXACT = 1; + + __forceinline ExactApproxValuePair(double initial_value = 0.0) + { + exactApproxPair = { initial_value, initial_value }; + } + + constexpr double GetValue(int offset) + { + return exactApproxPair[offset]; + } + + __forceinline void SetValue(double value, int offset) + { + exactApproxPair[offset] = value; + } + + std::array exactApproxPair; + }; + + //update cached nominal deltas based on highAccuracy and recomputeAccurateDistances, caching what is needed given those flags + inline void ComputeNominalDistanceTerms() + { + bool compute_accurate = NeedToPrecomputeAccurate(); + bool compute_approximate = NeedToPrecomputeApproximate(); + + //infinite pValue means take max or min, so just use 1 for computations below, + // and term aggregation (outside of this function) will take care of the rest + double effective_p_value = pValue; + if(pValue == std::numeric_limits::infinity() || pValue == -std::numeric_limits::infinity()) + effective_p_value = 1; + + const size_t num_features = featureParams.size(); + + //value of delta for nominal values when not using high accuracy, may be not exactly 1.0 due to using FastPow approximation for effective_p_values that aren't 1 + double nominal_approximate_diff = 1.0; + if(compute_approximate) + nominal_approximate_diff = ( (effective_p_value == 1) ? 1.0 : FastPowNonZeroExp(1.0, effective_p_value) ); + + for(size_t i = 0; i < num_features; i++) + { + auto &feat_params = featureParams[i]; + if(feat_params.featureType != FDT_NOMINAL) + continue; + + double weight = feat_params.weight; + + if(!DoesFeatureHaveDeviation(i)) + { + if(compute_accurate) + { + feat_params.nominalMatchDistanceTerm.SetValue(0.0, ExactApproxValuePair::EXACT); + + if(pValue != 0) + feat_params.nominalNonMatchDistanceTerm.SetValue(weight, ExactApproxValuePair::EXACT); + else //1.0 to any power is still 1.0 when computed exactly + feat_params.nominalNonMatchDistanceTerm.SetValue(1.0, ExactApproxValuePair::EXACT); + } + + if(compute_approximate) + { + feat_params.nominalMatchDistanceTerm.SetValue(0.0, ExactApproxValuePair::APPROX); + + if(effective_p_value != 0) + feat_params.nominalNonMatchDistanceTerm.SetValue(weight * nominal_approximate_diff, ExactApproxValuePair::APPROX); + else //pValue == 0 + feat_params.nominalNonMatchDistanceTerm.SetValue(FastPow(1.0, weight), ExactApproxValuePair::APPROX); + } + } + else //has deviations + { + double deviation = feat_params.deviation; + double nominal_count = feat_params.typeAttributes.nominalCount; + + // n = number of nominal classes + // match: deviation ^ p * weight + // non match: (deviation + (1 - deviation) / (n - 1)) ^ p * weight + //if there is only one nominal class, the smallest delta value it could be is the specified smallest delta, otherwise it's 1.0 + constexpr double smallest_delta = 1e-100; + if(nominal_count == 1 && deviation < smallest_delta) + deviation = smallest_delta; + + double mismatch_deviation = 1.0; + if(nominal_count > 1) + mismatch_deviation = (deviation + (1 - deviation) / (nominal_count - 1)); + + if(compute_accurate) + { + if(effective_p_value == 1) + { + feat_params.nominalMatchDistanceTerm.SetValue(deviation * weight, ExactApproxValuePair::EXACT); + feat_params.nominalNonMatchDistanceTerm.SetValue(mismatch_deviation * weight, ExactApproxValuePair::EXACT); + } + else if(effective_p_value != 0) + { + feat_params.nominalMatchDistanceTerm.SetValue(std::pow(deviation, effective_p_value) * weight, ExactApproxValuePair::EXACT); + feat_params.nominalNonMatchDistanceTerm.SetValue(std::pow(mismatch_deviation, effective_p_value) * weight, ExactApproxValuePair::EXACT); + } + else //pValue == 0 + { + feat_params.nominalMatchDistanceTerm.SetValue(std::pow(deviation, weight), ExactApproxValuePair::EXACT); + feat_params.nominalNonMatchDistanceTerm.SetValue(std::pow(mismatch_deviation, weight), ExactApproxValuePair::EXACT); + } + } + + if(compute_approximate) + { + if(effective_p_value == 1) + { + feat_params.nominalMatchDistanceTerm.SetValue(deviation * weight, ExactApproxValuePair::APPROX); + feat_params.nominalNonMatchDistanceTerm.SetValue(mismatch_deviation * weight, ExactApproxValuePair::APPROX); + } + else if(effective_p_value != 0) + { + feat_params.nominalMatchDistanceTerm.SetValue(FastPow(deviation, effective_p_value) * weight, ExactApproxValuePair::APPROX); + feat_params.nominalNonMatchDistanceTerm.SetValue(FastPow(mismatch_deviation, effective_p_value) * weight, ExactApproxValuePair::APPROX); + } + else //pValue == 0 + { + feat_params.nominalMatchDistanceTerm.SetValue(FastPow(deviation, weight), ExactApproxValuePair::APPROX); + feat_params.nominalNonMatchDistanceTerm.SetValue(FastPow(mismatch_deviation, weight), ExactApproxValuePair::APPROX); + } + } + } + } + } + +public: + + //query functions + + //returns true if the feature has a nonzero weight + __forceinline bool IsFeatureEnabled(size_t feature_index) + { + return (featureParams[feature_index].weight > 0.0); + } + + //returns true if the feature is nominal + __forceinline bool IsFeatureNominal(size_t feature_index) + { + return (featureParams[feature_index].featureType == FDT_NOMINAL); + } + + //returns true if the feature is cyclic + __forceinline bool IsFeatureCyclic(size_t feature_index) + { + return (featureParams[feature_index].featureType == FDT_CONTINUOUS_NUMERIC_CYCLIC); + } + + //returns true if the feature has a deviation + __forceinline bool DoesFeatureHaveDeviation(size_t feature_index) + { + return (featureParams[feature_index].deviation > 0); + } + + //returns true if a known to unknown distance term would be less than or same as an exact match + // based on the difference versus deviation + __forceinline bool IsKnownToUnknownDistanceLessThanOrEqualToExactMatch(size_t feature_index) + { + auto &feature_params = featureParams[feature_index]; + return (feature_params.knownToUnknownDifference <= feature_params.deviation); + } + + //computes the exponentiation of d to 1/p + __forceinline double InverseExponentiateDistance(double d) + { + if(pValue == 1) + return d; + + if(pValue == 0.5) + return d * d; + + if(defaultPrecision == ExactApproxValuePair::EXACT) + return std::pow(d, inversePValue); + else + return fastPowInverseP.FastPow(d); + } + + //computes the exponentiation of d to p given precision being from ExactApproxValuePair + __forceinline double ExponentiateDifferenceTerm(double d, int precision) + { + if(pValue == 1) + return d; + + if(pValue == 2) + return d * d; + + if(precision == ExactApproxValuePair::EXACT) + return std::pow(d, pValue); + else + return fastPowP.FastPow(d); + } + + //returns the maximum difference + inline double GetMaximumDifference(size_t index) + { + auto &feature_params = featureParams[index]; + switch(feature_params.featureType) + { + case FDT_NOMINAL: + return 1.0; + + case FDT_CONTINUOUS_NUMERIC_CYCLIC: + return feature_params.typeAttributes.maxCyclicDifference / 2; + + default: + if(feature_params.weight > 0) + return std::numeric_limits::infinity(); + else + return -std::numeric_limits::infinity(); + } + } + //computes the distance term for a nominal when two nominals are equal + __forceinline double ComputeDistanceTermNominalExactMatch(size_t index) + { + return featureParams[index].nominalMatchDistanceTerm.GetValue(defaultPrecision); + } + + //computes the distance term for a nominal when two nominals are not equal + __forceinline double ComputeDistanceTermNominalNonMatch(size_t index) + { + return featureParams[index].nominalNonMatchDistanceTerm.GetValue(defaultPrecision); + } + + //computes the distance term for an unknown-unknown + __forceinline double ComputeDistanceTermUnknownToUnknown(size_t index) + { + return featureParams[index].unknownToUnknownDistanceTerm.GetValue(defaultPrecision); + } + + //computes the distance term for an known-unknown + __forceinline double ComputeDistanceTermKnownToUnknown(size_t index) + { + return featureParams[index].knownToUnknownDistanceTerm.GetValue(defaultPrecision); + } + + //computes the inner term for a non-nominal with an exact match of values + __forceinline double ComputeDistanceTermNonNominalExactMatch(size_t index) + { + if(!DoesFeatureHaveDeviation(index)) + return 0.0; + + //apply deviations + double diff = ComputeDeviationPart(0.0, featureParams[index].deviation); + + //exponentiate and return with weight + return ExponentiateDifferenceTerm(diff, defaultPrecision) * featureParams[index].weight; + } + + //computes the difference between two values non-nominal (e.g., continuous) + __forceinline double ComputeDifferenceTermNonNominal(double diff, size_t index) + { + //compute absolute value + diff = std::abs(diff); + + //apply cyclic wrapping + if(IsFeatureCyclic(index)) + diff = ConstrainDifferenceToCyclicDifference(diff, featureParams[index].typeAttributes.maxCyclicDifference); + + //apply deviations + if(DoesFeatureHaveDeviation(index)) + diff += ComputeDeviationPart(diff, featureParams[index].deviation); + + return diff; + } + + //computes the difference between two values non-nominal (e.g., continuous) that isn't cyclic + __forceinline double ComputeDifferenceTermNonNominalNonCyclic(double diff, size_t index) + { + //compute absolute value + diff = std::abs(diff); + + //apply deviations + if(DoesFeatureHaveDeviation(index)) + diff += ComputeDeviationPart(diff, featureParams[index].deviation); + + return diff; + } + + //computes the distance term for a non-nominal (e.g., continuous) for p non-zero and non-infinite with no nulls + // diff can be negative + __forceinline double ComputeDistanceTermNonNominalNonNullRegular(double diff, size_t index) + { + diff = ComputeDifferenceTermNonNominal(diff, index); + + //exponentiate and return with weight + return ExponentiateDifferenceTerm(diff, defaultPrecision) * featureParams[index].weight; + } + + //computes the distance term for a non-nominal (e.g., continuous) for p non-zero and non-infinite with max of one null + // diff can be negative + __forceinline double ComputeDistanceTermNonNominalOneNonNullRegular(double diff, size_t index) + { + diff = ComputeDifferenceTermNonNominal(diff, index); + + //exponentiate and return with weight + return ExponentiateDifferenceTerm(diff, defaultPrecision) * featureParams[index].weight; + } + + //computes the distance term for a non-nominal (e.g., continuous) for p non-zero and non-infinite that isn't cyclic with no nulls + // diff can be negative + __forceinline double ComputeDistanceTermNonNominalNonCyclicNonNullRegular(double diff, size_t index) + { + diff = ComputeDifferenceTermNonNominalNonCyclic(diff, index); + + //exponentiate and return with weight + return ExponentiateDifferenceTerm(diff, defaultPrecision) * featureParams[index].weight; + } + + //computes the distance term for a non-nominal (e.g., continuous) for p non-zero and non-infinite that isn't cyclic with max of one null + // diff can be negative + __forceinline double ComputeDistanceTermNonNominalNonCyclicOneNonNullRegular(double diff, size_t index) + { + if(FastIsNaN(diff)) + return ComputeDistanceTermKnownToUnknown(index); + + diff = ComputeDifferenceTermNonNominalNonCyclic(diff, index); + + //exponentiate and return with weight + return ExponentiateDifferenceTerm(diff, defaultPrecision) * featureParams[index].weight; + } + + //computes the inner term of the Minkowski norm summation for a single index for p=0 + __forceinline double ComputeDistanceTermP0(EvaluableNodeImmediateValue a, EvaluableNodeImmediateValue b, + EvaluableNodeImmediateValueType a_type, EvaluableNodeImmediateValueType b_type, size_t index) + { + double diff = ComputeDifference(a, b, a_type, b_type, featureParams[index].featureType); + if(FastIsNaN(diff)) + return LookupNullDistanceTerm(a, b, a_type, b_type, index); + + //if nominal, don't need to compute absolute value of diff because just need to compare to 0 + if(IsFeatureNominal(index)) + return (diff == 0.0) ? ComputeDistanceTermNominalExactMatch(index) : ComputeDistanceTermNominalNonMatch(index); + + diff = ComputeDifferenceTermNonNominal(diff, index); + + return std::pow(diff, featureParams[index].weight); + } + + //computes the inner term of the Minkowski norm summation for a single index for p=infinity or -infinity + __forceinline double ComputeDistanceTermPInf(EvaluableNodeImmediateValue a, EvaluableNodeImmediateValue b, + EvaluableNodeImmediateValueType a_type, EvaluableNodeImmediateValueType b_type, size_t index) + { + double diff = ComputeDifference(a, b, a_type, b_type, featureParams[index].featureType); + if(FastIsNaN(diff)) + return LookupNullDistanceTerm(a, b, a_type, b_type, index); + + //if nominal, don't need to compute absolute value of diff because just need to compare to 0 + if(IsFeatureNominal(index)) + return (diff == 0.0) ? ComputeDistanceTermNominalExactMatch(index) : ComputeDistanceTermNominalNonMatch(index); + + diff = ComputeDifferenceTermNonNominal(diff, index); + + return diff * featureParams[index].weight; + } + + //computes the inner term of the Minkowski norm summation for a single index regardless of pValue + __forceinline double ComputeDistanceTermNonNull(double diff, size_t index, int precision) + { + if(!IsFeatureNominal(index)) + diff = ComputeDifferenceTermNonNominal(diff, index); + + if(pValue == 0.0) + return std::pow(diff, featureParams[index].weight); + else if(pValue == std::numeric_limits::infinity() + || pValue == -std::numeric_limits::infinity()) + return diff * featureParams[index].weight; + else + return ExponentiateDifferenceTerm(diff, precision) * featureParams[index].weight; + } + + //computes the inner term of the Minkowski norm summation for a single index for p non-zero and non-infinite + //where at least one of the values is non-null + __forceinline double ComputeDistanceTermRegularOneNonNull(double diff, size_t index) + { + if(FastIsNaN(diff)) + return ComputeDistanceTermKnownToUnknown(index); + + //if nominal, don't need to compute absolute value of diff because just need to compare to 0 + if(IsFeatureNominal(index)) + return (diff == 0.0) ? ComputeDistanceTermNominalExactMatch(index) : ComputeDistanceTermNominalNonMatch(index); + + return ComputeDistanceTermNonNominalNonNullRegular(diff, index); + } + + //computes the inner term of the Minkowski norm summation for a single index for p non-zero and non-infinite + __forceinline double ComputeDistanceTermRegular(EvaluableNodeImmediateValue a, EvaluableNodeImmediateValue b, + EvaluableNodeImmediateValueType a_type, EvaluableNodeImmediateValueType b_type, size_t index) + { + double diff = ComputeDifference(a, b, a_type, b_type, featureParams[index].featureType); + if(FastIsNaN(diff)) + return LookupNullDistanceTerm(a, b, a_type, b_type, index);; + + //if nominal, don't need to compute absolute value of diff because just need to compare to 0 + if(IsFeatureNominal(index)) + return (diff == 0.0) ? ComputeDistanceTermNominalExactMatch(index) : ComputeDistanceTermNominalNonMatch(index); + + return ComputeDistanceTermNonNominalNonNullRegular(diff, index); + } + + //computes the inner term of the Minkowski norm summation for a single index that isn't null, + //but computes only from the distance (does not take into account feature measurement type) + __forceinline double ComputeDistanceTermFromNonNullDifferenceOnly(double diff, size_t index) + { + if(pValue == 0.0) + return std::pow(diff, featureParams[index].weight); + else if(pValue == std::numeric_limits::infinity() + || pValue == -std::numeric_limits::infinity()) + return diff * featureParams[index].weight; + else + return ExponentiateDifferenceTerm(diff, defaultPrecision) * featureParams[index].weight; + } + + //returns the distance term for the either one or two unknown values + __forceinline double LookupNullDistanceTerm(EvaluableNodeImmediateValue a, EvaluableNodeImmediateValue b, + EvaluableNodeImmediateValueType a_type, EvaluableNodeImmediateValueType b_type, size_t index) + { + bool a_unknown = (a_type == ENIVT_NULL || (a_type == ENIVT_NUMBER && FastIsNaN(a.number))); + bool b_unknown = (b_type == ENIVT_NULL || (b_type == ENIVT_NUMBER && FastIsNaN(b.number))); + if(a_unknown && b_unknown) + return ComputeDistanceTermUnknownToUnknown(index); + if(a_unknown || b_unknown) + return ComputeDistanceTermKnownToUnknown(index); + + //incompatible types, use whichever is further + return std::max(ComputeDistanceTermUnknownToUnknown(index), ComputeDistanceTermKnownToUnknown(index)); + } + + //computes the difference between a and b given their types and the distance_type and the feature difference type + __forceinline static double ComputeDifference(EvaluableNodeImmediateValue a, EvaluableNodeImmediateValue b, + EvaluableNodeImmediateValueType a_type, EvaluableNodeImmediateValueType b_type, FeatureDifferenceType feature_type) + { + if(feature_type == FDT_CONTINUOUS_NUMERIC || feature_type == FDT_CONTINUOUS_UNIVERSALLY_NUMERIC + || feature_type == FDT_CONTINUOUS_NUMERIC_CYCLIC) + { + if(a_type == ENIVT_NUMBER && b_type == ENIVT_NUMBER) + return a.number - b.number; + + if(a_type == ENIVT_STRING_ID && b_type == ENIVT_STRING_ID) + return (a.stringID == b.stringID ? 0.0 : 1.0); + + return std::numeric_limits::quiet_NaN(); + } + + if(a_type == ENIVT_NULL || b_type == ENIVT_NULL) + return std::numeric_limits::quiet_NaN(); + + if(feature_type == FDT_NOMINAL) + { + if(a_type == ENIVT_NUMBER && b_type == ENIVT_NUMBER) + return (a.number == b.number ? 0.0 : 1.0); + + if(a_type == ENIVT_STRING_ID && b_type == ENIVT_STRING_ID) + return (a.stringID == b.stringID ? 0.0 : 1.0); + + if(a_type == ENIVT_CODE && b_type == ENIVT_CODE) + return (EvaluableNode::AreDeepEqual(a.code, b.code) ? 0.0 : 1.0); + + //don't match + return 1.0; + } + + if(feature_type == FDT_CONTINUOUS_STRING) + { + if(a_type == ENIVT_STRING_ID && b_type == ENIVT_STRING_ID) + { + auto &a_str = string_intern_pool.GetStringFromID(a.stringID); + auto &b_str = string_intern_pool.GetStringFromID(b.stringID); + return static_cast(EvaluableNodeTreeManipulation::EditDistance(a_str, b_str)); + } + + return std::numeric_limits::quiet_NaN(); + } + + //everything below is for feature_type == FDT_CONTINUOUS_CODE + + if(a_type == ENIVT_NUMBER && b_type == ENIVT_NUMBER) + return 1.0 - EvaluableNodeTreeManipulation::CommonalityBetweenNumbers(a.number, b.number); + + if(a_type == ENIVT_STRING_ID && b_type == ENIVT_STRING_ID) + return (a.stringID == b.stringID ? 0.0 : 1.0); + + if(a_type == ENIVT_CODE || b_type == ENIVT_CODE) + { + //if one isn't code, then just return the size of the other, or at least 1 + if(a_type != ENIVT_CODE) + return std::max(1.0, static_cast(EvaluableNode::GetDeepSize(b.code))); + if(b_type != ENIVT_CODE) + return std::max(1.0, static_cast(EvaluableNode::GetDeepSize(a.code))); + + return EvaluableNodeTreeManipulation::EditDistance(a.code, b.code); + } + + //different immediate types + return 1.0; + } + + //computes the Minkowski distance between vectors a and b, and respective types a_types and b_types, with Minkowski parameter p + // calling the fastest version that will work with the data provided + //a, a_types, b, and b_types must be the same length + //if weights.size() == 0, no weights are used, else weights.size() must == a.size() == b.size() + // -a multiplicative weight is applied to each a and b dimensional differences + //if nominal_dimensions.size() == 0, no nominal features are used, else nominal_features.size() must == a.size() == b.size() + // -uses featured nominal dimensions: if a dimension is marked in nominal_dimensions as true, partial distance is binary: 0.0 if equal, 1.0 otherwise + //if deviations.size() == 0, no deviations are used, else deviations.size() must == a.size() == b.size() + // -uses per-feature deviations: per-feature deviation is added after the distance between ai and bi is computed + __forceinline double ComputeMinkowskiDistance(std::vector &a, std::vector &a_types, + std::vector &b, std::vector &b_types) + { + if(a.size() != b.size()) + return std::numeric_limits::quiet_NaN(); + + if(pValue == 0.0) + { + double dist_accum = 1.0; + for(size_t i = 0; i < a.size(); i++) + dist_accum *= ComputeDistanceTermP0(a[i], b[i], a_types[i], b_types[i], i); + + return dist_accum; + } + else if(pValue == std::numeric_limits::infinity()) + { + double max_term = -std::numeric_limits::infinity(); + + for(size_t i = 0; i < a.size(); i++) + { + double term = ComputeDistanceTermPInf(a[i], b[i], a_types[i], b_types[i], i); + + if(term > max_term) + max_term = term; + } + + return max_term; + } + else if(pValue == -std::numeric_limits::infinity()) + { + double min_term = std::numeric_limits::infinity(); + + for(size_t i = 0; i < a.size(); i++) + { + double term = ComputeDistanceTermPInf(a[i], b[i], a_types[i], b_types[i], i); + + if(term < min_term) + min_term = term; + } + + return min_term; + } + else //non-extreme p-value + { + double dist_accum = 0.0; + for(size_t i = 0; i < a.size(); i++) + dist_accum += ComputeDistanceTermRegular(a[i], b[i], a_types[i], b_types[i], i); + + return InverseExponentiateDistance(dist_accum); + } + } + + class FeatureParams + { + public: + inline FeatureParams() + : featureType(FDT_CONTINUOUS_NUMERIC), weight(1.0), deviation(0.0), + unknownToUnknownDistanceTerm(std::numeric_limits::quiet_NaN()), + knownToUnknownDistanceTerm(std::numeric_limits::quiet_NaN()), + unknownToUnknownDifference(std::numeric_limits::quiet_NaN()), + knownToUnknownDifference(std::numeric_limits::quiet_NaN()) + { + typeAttributes.maxCyclicDifference = std::numeric_limits::quiet_NaN(); + } + + //the type of comparison for each feature + // this type is 64-bit aligned to make sure the whole structure is aligned + FeatureDifferenceType featureType; + + //weight of the feature + double weight; + + //distance terms for nominals + ExactApproxValuePair nominalMatchDistanceTerm; + ExactApproxValuePair nominalNonMatchDistanceTerm; + + //type attributes dependent on featureType + union + { + //number of relevant nominal values + double nominalCount; + + //maximum difference value of the feature for cyclic features (NaN if unknown) + double maxCyclicDifference; + + } typeAttributes; + + //uncertainty of each value + double deviation; + + //distance term to use if both values being compared are unknown + ExactApproxValuePair unknownToUnknownDistanceTerm; + + //distance term to use if one value is known and the other is unknown + ExactApproxValuePair knownToUnknownDistanceTerm; + + //difference between two values if both are unknown (NaN if unknown) + double unknownToUnknownDifference; + + //difference between two values if one is known and the other is unknown (NaN if unknown) + double knownToUnknownDifference; + }; + + std::vector featureParams; + + //precached ways to compute FastPow + RepeatedFastPow fastPowP; + RepeatedFastPow fastPowInverseP; + + //parameter of the Lebesgue space and Minkowski distance parameter + double pValue; + //computed inverse of pValue + double inversePValue; + + //the current precision for exact vs approximate terms + int defaultPrecision; + + //if true, then all computations should be performed with high accuracy + bool highAccuracy; + //if true, then estimates should be computed with low accuracy, but final results with high accuracy + // if false, will reuse accuracy from estimates + bool recomputeAccurateDistances; +}; diff --git a/src/Amalgam/HashMaps.h b/src/Amalgam/HashMaps.h new file mode 100644 index 00000000..3a7ace2f --- /dev/null +++ b/src/Amalgam/HashMaps.h @@ -0,0 +1,46 @@ +#pragma once + +//////////////////// +// Defines hash set types in a generic way so they can be easily changed +// * * * Profile and choose whichever works fastest and with least memory * * * +// Notes about the hashes: +// std::unordered is second best for maximizing debugability (due to IDE support) but not as easy as std::map, but is slow +// ska::flat_hash is best for performance, but eats a bit of memory +// ska::bytell_hash is good for compact memory and almost as fast as ska::flat_hash (should be used for things that need to be fairly fast but are not accessed as frequently, where minimizing memory is more important) + +#ifdef USE_STL_HASH_MAPS + +#include +#include + +template, typename E = std::equal_to, typename A = std::allocator > +using FastHashSet = std::unordered_set; + +template, typename E = std::equal_to, typename A = std::allocator > > +using FastHashMap = std::unordered_map; + +template, typename E = std::equal_to, typename A = std::allocator > +using CompactHashSet = std::unordered_set; + +template, typename E = std::equal_to, typename A = std::allocator > > +using CompactHashMap = std::unordered_map; + +#else + +//fastest hash maps +#include "skarupke_maps/bytell_hash_map.hpp" +#include "skarupke_maps/flat_hash_map.hpp" + +template, typename E = std::equal_to, typename A = std::allocator > +using FastHashSet = ska::flat_hash_set; + +template, typename E = std::equal_to, typename A = std::allocator > > +using FastHashMap = ska::flat_hash_map; + +template, typename E = std::equal_to, typename A = std::allocator > +using CompactHashSet = ska::bytell_hash_set; + +template, typename E = std::equal_to, typename A = std::allocator > > +using CompactHashMap = ska::bytell_hash_map; + +#endif diff --git a/src/Amalgam/IntegerSet.h b/src/Amalgam/IntegerSet.h new file mode 100644 index 00000000..1ef7cf2b --- /dev/null +++ b/src/Amalgam/IntegerSet.h @@ -0,0 +1,1813 @@ +#pragma once + +//project headers: +#include "FastMath.h" +#include "PlatformSpecific.h" +#include "RandomStream.h" + +//system headers: +#include +#include +#include +#include + +//container for holding sparse integers that maximizes efficiency of interoperating +// with BitArrayIntegerSet +class SortedIntegerSet +{ +public: + + SortedIntegerSet() + { } + + template + SortedIntegerSet(const Collection &other) + { + integers.reserve(other.size()); + for(const size_t element : other) + insert(element); + } + + //defined to keep compatibility with stl containers + using value_type = size_t; + + using Iterator = std::vector::iterator; + + //assignment operator, deep copies bit buffer + inline void operator =(const SortedIntegerSet &other) + { + integers = other.integers; + } + + //std begin (must be lowercase) + __forceinline auto begin() + { + return std::begin(integers); + } + + //std end (must be lowercase) + //returns bit 0 of the lowest bucket that is not populated + __forceinline auto end() + { + return std::end(integers); + } + + //returns the nth id in the set by sorted order + size_t GetNthElement(size_t n) + { + //if asking for something too big, just return last element (size) + if(n > integers.size()) + return GetEndInteger(); + + return integers[n]; + } + + //returns a random integer + size_t GetRandomElement(RandomStream &random_stream) + { + size_t i = random_stream.RandSize(integers.size()); + return integers[i]; + } + + //clears the BitArrayIntegerSet as if it is new + __forceinline void clear() + { + integers.clear(); + } + + //returns the number of elements that exist in the hash set + __forceinline size_t size() + { + return integers.size(); + } + + //returns one past the maximum index in the container, 0 if empty + __forceinline size_t GetEndInteger() + { + size_t max_position = integers.size(); + if(max_position > 0) + return integers[max_position - 1] + 1; + + return 0; + } + + //reserves the number of elements to be inserted + __forceinline void ReserveNumIntegers(size_t num_elements) + { + integers.reserve(num_elements); + } + + //returns true if the id exists in the set + __forceinline bool contains(size_t id) + { + auto location = std::lower_bound(std::begin(integers), std::end(integers), id); + if(location == std::end(integers)) + return false; + + return id == *location; + } + + //returns true if the id exists in the set + __forceinline bool operator [](size_t id) + { + return contains(id); + } + + //inserts id into hash set, does nothing if id already exists + void insert(size_t id) + { + auto location = std::lower_bound(std::begin(integers), std::end(integers), id); + + //insert as long as it doesn't already exist + if(location == std::end(integers) || id != *location) + integers.emplace(location, id); + } + + //inserts all elements in collection + //assumes that the elements are not in this set, but does not assume elements are sorted + template + __forceinline void insert(Collection &other) + { + for(const size_t element : other) + insert(element); + } + + //inserts all elements in collection + //assumes that the elements are not in this set, but does not assume elements are sorted + //functionally identical to insert + template + __forceinline void InsertInBatch(Collection &other) + { + for(const size_t element : other) + insert(element); + } + + //inserts all elements in collection + //assumes that the elements are not in this set and that the elements are sorted + template + __forceinline void InsertNewSortedIntegers(Collection &other) + { + integers.reserve(other.size()); + for(const size_t element : other) + integers.emplace_back(element); + } + + //insert an id is larger than GetEndInteger() + //assumes that the element is not in this set + __forceinline void InsertNewLargestInteger(size_t id) + { + integers.push_back(id); + } + + //removes id from hash set, does nothing if id does not exist in the hash + void erase(size_t id) + { + auto location = std::lower_bound(std::begin(integers), std::end(integers), id); + if(location == std::end(integers)) + return; + + if(id == *location) + integers.erase(location); + } + + //removes all elements contained by other + template + void erase(Container &other) + { + auto other_iter = std::begin(other); + auto other_end = std::end(other); + + //copies data to the destination instead of erasing + // to reduce computational complexity + size_t dest_index = 0; + size_t cur_index = 0; + while(cur_index != integers.size()) + { + //if other is exhausted, then nothing left to erase, so move everything downward + if(other_iter == other_end) + { + integers.erase(std::begin(integers) + dest_index, std::begin(integers) + cur_index); + return; + } + + //if next integer in other is greater, copy and continue + if(integers[cur_index] < *other_iter) + { + if(dest_index != cur_index) + integers[dest_index] = integers[cur_index]; + + dest_index++; + cur_index++; + } + else //other is less than or equal + { + //if next integer is the same, erase + if(integers[cur_index] == *other_iter) + cur_index++; + + //pre-increment in case it matters for performance for iterator type + ++other_iter; + } + } + } + + //removes all elements contained by other, intended for calling in a batch + //for this container, it is the same as erase + template + inline void EraseInBatch(Container &other) + { + erase(other); + } + + //removes the id and returns true if it was in the id before removal + bool EraseAndRetrieve(size_t id) + { + auto location = std::lower_bound(std::begin(integers), std::end(integers), id); + if(location == std::end(integers)) + return false; + + if(id == *location) + { + integers.erase(location); + return true; + } + + return false; + } + + //does not need to do anything, just conforming to the interface + constexpr void UpdateNumElements() + { } + + //sets this to the set that contains all elements of itself or other + template + void Union(Container &other) + { + auto other_iter = std::begin(other); + auto other_end = std::end(other); + + size_t cur_index = 0; + while(cur_index != integers.size()) + { + //if other is exhausted, then done + if(other_iter == other_end) + return; + + //if next integer in other is greater, just continue + if(integers[cur_index] < *other_iter) + { + cur_index++; + } + else //other is less than or equal + { + //only insert the next integer if it is different + if(integers[cur_index] != *other_iter) + integers.insert(std::begin(integers) + cur_index, *other_iter); + + ++other_iter; + } + } + + integers.insert(std::end(integers), other_iter, other_end); + } + + //sets this to the set that contains only elements that it and another jointly contain + template + void Intersect(Container &other) + { + auto other_iter = std::begin(other); + auto other_end = std::end(other); + + //copies data to the destination instead of erasing + // to reduce computational complexity + size_t dest_index = 0; + size_t cur_index = 0; + while(cur_index != integers.size() && other_iter != other_end) + { + //if next integer is greater, skip + if(integers[cur_index] < *other_iter) + { + cur_index++; + } + else //other is less than or equal + { + //if next integer is the same, copy and keep + if(integers[cur_index] == *other_iter) + { + if(dest_index != cur_index) + integers[dest_index] = integers[cur_index]; + + dest_index++; + cur_index++; + } + + ++other_iter; + } + } + + //cut off anything at the end + integers.resize(dest_index); + } + + //returns the first offset of the vector returned by GetIntegerVector that is + // greater than id; will return the size of the integers vector if id is larger + // than all in it + __forceinline size_t GetFirstIntegerVectorLocationGreaterThan(size_t id) + { + auto location = std::upper_bound(std::begin(integers), std::end(integers), id); + return std::distance(std::begin(integers), location); + } + + constexpr std::vector &GetIntegerVector() + { + return integers; + } + +protected: + std::vector integers; +}; + +//uses bit-compression to hash integral key values into a set +// note that some of the methods follow the STL convention so that +// this library can be closer to a drop-in replacement for STL sets +class BitArrayIntegerSet +{ +public: + //defined to keep compatibility with stl containers + using value_type = size_t; + + BitArrayIntegerSet() + { + numElements = 0; + curMaxNumIndices = 0; + } + + struct Iterator + { + constexpr Iterator() + : bucket(0), bit(0), hash(nullptr) + { } + + constexpr Iterator(BitArrayIntegerSet *_hash, size_t _bucket, size_t _bit) + : bucket(_bucket), bit(_bit), hash(_hash) + { } + + constexpr Iterator operator =(const Iterator &other) + { + bucket = other.bucket; + bit = other.bit; + hash = other.hash; + return *this; + } + + constexpr bool operator ==(const Iterator &other) + { + return (bucket == other.bucket && bit == other.bit); + } + + constexpr bool operator !=(const Iterator &other) + { + return (bucket != other.bucket || bit != other.bit); + } + + __forceinline Iterator &operator ++() + { + hash->FindNext(bucket, bit); + return *this; + } + + //dereference operator + constexpr size_t operator *() + { + return hash->GetIndexFromBucketAndBit(bucket, bit); + } + + //bucket index + size_t bucket; + + //bit index + size_t bit; + + //associated set + BitArrayIntegerSet *hash; + }; + + //assignment operator, deep copies bit buffer + inline void operator =(const BitArrayIntegerSet &other) + { + numElements = other.numElements; + curMaxNumIndices = other.curMaxNumIndices; + bitBucket = other.bitBucket; + } + + //std begin (must be lowercase) + inline Iterator begin() + { + size_t bucket = 0; + size_t bit = 0; + FindFirst(bucket, bit); + return Iterator(this, bucket, bit); + } + + //std end (must be lowercase) + //returns bit 0 of the lowest bucket that is not populated + __forceinline Iterator end() + { + //get the last bucket with the 0th bit + size_t bucket = bitBucket.size(); + size_t bit = 0; + + return Iterator(this, bucket, bit); + } + + //iterates over all of the integers as efficiently as possible, passing them into func + template + inline void IterateOver(IntegerFunction func, size_t up_to_index = std::numeric_limits::max()) + { + size_t end_integer = GetEndInteger(); + size_t num_buckets = (end_integer + 63) / 64; + size_t num_indices = size(); + size_t end_index = std::min(up_to_index, end_integer); + + //if dense, loop over, assuming likely to hit + if(num_indices / num_buckets > 32) + { + for(size_t index = 0; index < end_index; index++) + { + if(ContainsWithoutMaximumIndexCheck(index)) + func(index); + } + } + else //use the iterator, which is more efficient when sparse + { + auto iter = begin(); + size_t index = *iter; + while(index < end_index) + { + func(index); + ++iter; + index = *iter; + } + } + } + + //sets bucket and bit to the values pointing to the first id in the hash, + // or the first element if it is empty + inline void FindFirst(size_t &bucket, size_t &bit) + { + bucket = 0; + bit = 0; + + if(bitBucket.size() == 0) + return; + + //if the first isn't set, then find the next + if(!(bitBucket[0] & 1)) + FindNext(bucket, bit); + } + + //returns the first id in the hash, or size_t::max if there are no ids in the hash + size_t First() + { + size_t bucket = 0; + size_t bit = 0; + FindFirst(bucket, bit); + return GetIndexFromBucketAndBit(bucket, bit); + } + + //sets bucket and bit to the values pointing to the next id in the hash + // assumes that bucket and bit point to a valid index + //if there are no more ids, then it will return bit 0 of the lowest bucket that is not populated + void FindNext(size_t &bucket, size_t &bit) + { + bit++; + + //optimized early exit for dense arrays + if(bit < numBitsPerBucket && (bitBucket[bucket] & (1ULL << bit))) + return; + + //move on to next bucket if out of bits + if(bit == numBitsPerBucket || bitBucket[bucket] < (1ULL << bit)) + { + bit = 0; + bucket++; + + //exit out if no more buckets + if(bucket == bitBucket.size()) + return; + } + + //there's leftover bits set, find the next + if(bitBucket[bucket] > 0) + { + while((bitBucket[bucket] & (1ULL << bit)) == 0) + bit++; + return; + } + + //empty bucket, skip until find non-empty or run out of buckets + do + { + bucket++; + if(bucket == bitBucket.size()) + return; + } while(bitBucket[bucket] == 0); + + //if made it here, then there is nonzero value in bitBucket[bucket] + bit = Platform_FindFirstBitSet(bitBucket[bucket]); + } + + //returns the next id in the hash + //if there are no more ids, then it will return bit 0 of the lowest bucket that is not populated + inline size_t Next(size_t id) + { + size_t bucket = GetBucket(id); + size_t bit = GetBit(id); + FindNext(bucket, bit); + return GetIndexFromBucketAndBit(bucket, bit); + } + + //returns the nth id in the set by sorted order + size_t GetNthElement(size_t n) + { + //if asking for something too big, just return last element (size) + if(n > numElements) + return GetEndInteger(); + + //fast forward using poopulation count to find the bucket + size_t iteration = 0; + size_t bucket = 0; + for(; bucket < bitBucket.size(); bucket++) + { + size_t bucket_count = __popcnt64(bitBucket[bucket]); + //look for where the count exceeds n because the bit hasn't been found yet (e.g., bit 0 is found by the first count of 1) + if(iteration + bucket_count > n) + break; + + iteration += bucket_count; + } + + //start iterating from the bucket where the bit was found + // want to enter the loop at least once, even if iteration == n and have increments at the end, + // so a for loop doesn't quite work and neither does a do-while, hence the while(true) + size_t bit = 0; + while(true) + { + //find next bit + while((bitBucket[bucket] & (1ULL << bit)) == 0) + bit++; + + if(iteration == n) + break; + + //move on to subsequent iteration and bit + iteration++; + bit++; + } + + return GetIndexFromBucketAndBit(bucket, bit); + } + + //does not uniformly get an element, first selects a bucket at random, then selects an element in the bucket at random + size_t GetRandomElement(RandomStream &random_stream) + { + //if there are significatly less elements than the set size, use the iterative method to select a uniformly random element + if(curMaxNumIndices / 4 > numElements) + return GetNthElement(random_stream.RandSize(numElements)); + + //pick a bucket at random as long as it has some data in it (isn't 0) + size_t bucket_index = random_stream.RandSize(bitBucket.size()); + while(bitBucket[bucket_index] == 0) + bucket_index = random_stream.RandSize(bitBucket.size()); + + size_t out = GetIndexFromBucketAndBit(bucket_index, 0); + + int rand_limit = numBitsPerBucket; + + //use a rough fast approximation for finding the largest bit set for the value in the bucket without having to loop over all its bits or having to take logs + //break up smaller values into 4 categories so that we don't need to loop for a long time to find a small random number out of 64 + //for example any value under 128 only has a maximum of 7 bits set, so we'd do a randomStream.Rand() * 16 in that case + if(bitBucket[bucket_index] < 65536) //any values with a max bit of 16 + rand_limit = 16; + else if(bitBucket[bucket_index] < 4294967296) //any values with a max bit of 32 + rand_limit = 32; + else if(bitBucket[bucket_index] < 281474976710656) //any values with a max bit of 48 + rand_limit = 48; + + //pick out a set bit in the bucket at random + size_t bit = random_stream.RandSize(rand_limit); + while((bitBucket[bucket_index] & (1ULL << bit)) == 0) + bit = random_stream.RandSize(rand_limit); + + //output the index of the set bit + return out + bit; + } + + //clears the BitArrayIntegerSet as if it is new + __forceinline void clear() + { + bitBucket.clear(); + curMaxNumIndices = 0; + numElements = 0; + } + + //returns the number of elements that exist in the hash set + constexpr size_t size() + { + return numElements; + } + + //resizes to best fit num_ids, updates curMaxNumIndices + // will set all values as present or not based on fill_value + __forceinline void resize(size_t num_ids, bool fill_value = false) + { + //num_ids is 1-based, need to get the bucket for 0-based, + // then get the size, which adds 1 to the bucket + size_t total_num_buckets = GetBucket(num_ids - 1) + 1; + bitBucket.resize(total_num_buckets, fill_value ? 0xFFFFFFFFFFFFFFFFULL : 0); + curMaxNumIndices = total_num_buckets * numBitsPerBucket; + } + + //reserves space such that num_ids ranging from 0..num_ids-1 could then be directly placed into the hash + // implements stl standard function + inline void ReserveNumIntegers(size_t num_ids) + { + //this will catch num_ids = 0, since curMaxNumIndices can't be less than 0 + if(num_ids <= curMaxNumIndices) + return; + + resize(num_ids); + } + + //returns one past the maximum index in the container, 0 if empty + constexpr size_t GetEndInteger() + { + if(numElements == 0) + return 0; + + size_t bucket = bitBucket.size() - 1; + while(bucket > 0 && bitBucket[bucket] == 0) + bucket--; + + if(bitBucket[bucket] == 0) + return 0; + + //return 1 past the max index + return numBitsPerBucket * bucket + Platform_FindLastBitSet(bitBucket[bucket]) + 1; + } + + //returns true if the id exists in the set + __forceinline bool contains(size_t id) + { + if(id >= curMaxNumIndices) + return false; + + uint64_t bucket = bitBucket[GetBucket(id)]; + uint64_t mask = (1ULL << GetBit(id)); + + return bucket & mask; + } + + //returns true if the id exists in the set + // but does not check to see if the id is beyond the range + __forceinline bool ContainsWithoutMaximumIndexCheck(size_t id) + { + uint64_t bucket = bitBucket[GetBucket(id)]; + uint64_t mask = (1ULL << GetBit(id)); + + return bucket & mask; + } + + //returns true if the id exists in the set + __forceinline bool operator [](size_t id) + { + return contains(id); + } + + //sets all up_to_id integers to true/exist + void SetAllIds(size_t up_to_id) + { + if(up_to_id == 0) + { + clear(); + return; + } + + resize(up_to_id, true); + + //set the last field if applicable + if(up_to_id % numBitsPerBucket != 0) + { + size_t last_id = up_to_id - 1; + size_t last_bucket = GetBucket(last_id); + size_t first_unused_bit = GetBit(up_to_id); + size_t last_bucket_value = 0xFFFFFFFFFFFFFFFFULL >> (numBitsPerBucket - first_unused_bit); + bitBucket[last_bucket] = last_bucket_value; + } + + numElements = up_to_id; + } + + //inserts id into hash set, does nothing if id already exists in the hash + inline void insert(size_t id) + { + ReserveNumIntegers(id + 1); + + uint64_t &bucket = bitBucket[GetBucket(id)]; + uint64_t mask = (1ULL << GetBit(id)); + if((bucket & mask) == 0) + { + //set bit to 1 + bucket |= mask; + numElements++; + } + } + + //inserts all elements in collection + template + __forceinline void insert(Collection &other) + { + for(const size_t element : other) + insert(element); + + UpdateNumElements(); + } + + //inserts all elements in sis + inline void InsertInBatch(SortedIntegerSet &sis) + { + if(sis.size() == 0) + return; + + ReserveNumIntegers(sis.GetEndInteger()); + + //if there are elements, need to check if overwriting for keeping numElements updated + if(numElements > 0) + { + for(auto id : sis) + { + uint64_t &bucket = bitBucket[GetBucket(id)]; + uint64_t mask = (1ULL << GetBit(id)); + if((bucket & mask) == 0) + { + //set bit to 1 + bucket |= mask; + numElements++; + } + } + } + else //can just insert and count + { + for(auto id : sis) + { + uint64_t &bucket = bitBucket[GetBucket(id)]; + uint64_t mask = (1ULL << GetBit(id)); + + //set bit to 1 + bucket |= mask; + numElements++; + } + } + } + + //inserts all elements from other + __forceinline void InsertInBatch(BitArrayIntegerSet &other) + { + Union(other); + } + + //inserts all elements in collection + template + __forceinline void InsertInBatch(Collection &other) + { + for(const size_t element : other) + insert(element); + } + + //insert an id is larger than or equal to GetEndInteger() + __forceinline void InsertNewLargestInteger(size_t id) + { + insert(id); + } + + //removes id from hash set, does nothing if id does not exist in the hash + inline void erase(size_t id) + { + if(id >= curMaxNumIndices) + return; + + uint64_t &bucket = bitBucket[GetBucket(id)]; + uint64_t mask = (1ULL << GetBit(id)); + + //if nothing in the bucket, return early + if((bucket & mask) == 0) + return; + + //set bit to 0 + bucket &= ~mask; + numElements--; + + TrimBack(); + } + + //Sets this to the BitArrayIntegerSet to the set that contains only elements that it contains that other does not contain + // does NOT update the number of elements, so UpdateNumElements must be called + void EraseInBatch(BitArrayIntegerSet &other) + { + size_t max_index = std::min(curMaxNumIndices, other.curMaxNumIndices); + if(max_index == 0) + return; + + size_t max_bucket = GetBucket(max_index - 1); + + //perform intersection + for(size_t i = 0; i <= max_bucket; i++) + bitBucket[i] &= ~(other.bitBucket[i]); + + TrimBack(); + } + + //erases all elements in collection + template + __forceinline void EraseInBatch(Collection &collection) + { + for(const size_t id : collection) + { + if(id >= curMaxNumIndices) + continue; + + uint64_t &bucket = bitBucket[GetBucket(id)]; + uint64_t mask = (1ULL << GetBit(id)); + if((bucket & mask) != 0) + { + //set bit to 0 + bucket &= ~mask; + numElements--; + } + } + + TrimBack(); + } + + //removes all elements contained by other + void erase(BitArrayIntegerSet &other) + { + EraseInBatch(other); + UpdateNumElements(); + } + + //erases all elements in collection + template + __forceinline void erase(Collection &other) + { + for(auto i : other) + erase(i); + + TrimBack(); + UpdateNumElements(); + } + + //removes the id and returns true if it was in the id before removal + bool EraseAndRetrieve(size_t id) + { + if(id >= curMaxNumIndices) + return false; + + uint64_t &bucket = bitBucket[GetBucket(id)]; + uint64_t mask = (1ULL << GetBit(id)); + + //if nothing in the bucket, return early + if((bucket & mask) == 0) + return false; + + //set bit to 0 + bucket &= ~mask; + numElements--; + + TrimBack(); + + return true; + } + + //if id_from is present, it will "rename" it to id_to + void ChangeIdIfPresent(size_t id_from, size_t id_to) + { + if(id_from >= curMaxNumIndices) + return; + + uint64_t &bucket_from = bitBucket[GetBucket(id_from)]; + uint64_t mask_from = (1ULL << GetBit(id_from)); + + //if the id isn't present, conclude + if(!(bucket_from & mask_from)) + return; + + //remove id_from + if((bucket_from & mask_from) != 0) + { + //set bit to 0 + bucket_from &= ~mask_from; + numElements--; + } + + insert(id_to); + TrimBack(); + } + + //recomputes the number of inserted elements (may be necessary if doing parallel insertion operations like bit merging in union or intersect) + // must be called if a Batch operation is used + __forceinline void UpdateNumElements() + { + //update num elements + numElements = 0; + for(const auto &bucket : bitBucket) + numElements += __popcnt64(bucket); + } + + //trims off trailing empty buckets + __forceinline void TrimBack() + { + //always want to leave one bucket left + while(bitBucket.size() > 1 && bitBucket.back() == 0) + { + bitBucket.pop_back(); + curMaxNumIndices -= numBitsPerBucket; + } + } + + //Sets this to the BitArrayIntegerSet to the set that contains all elements of itself or other + void Union(BitArrayIntegerSet &other) + { + //skip if empty + if(other.curMaxNumIndices == 0) + return; + + //make sure it can hold all of the other + ReserveNumIntegers(other.curMaxNumIndices); + + //perform union + for(size_t i = 0; i < other.bitBucket.size(); i++) + bitBucket[i] |= other.bitBucket[i]; + + UpdateNumElements(); + } + + //Sets this to the BitArrayIntegerSet to the set that contains only elements that it and another jointly contain + void Intersect(BitArrayIntegerSet &other) + { + //if no intersection, then just clear and exit + if(numElements == 0 || other.numElements == 0) + { + clear(); + return; + } + + size_t this_bucket_end = bitBucket.size(); + size_t other_bucket_end = other.bitBucket.size(); + + //perform intersection on overlap + for(size_t i = 0; i < this_bucket_end && i < other_bucket_end; i++) + bitBucket[i] &= other.bitBucket[i]; + + //clear buckets after the other + for(size_t i = other_bucket_end; i < this_bucket_end; i++) + bitBucket[i] = 0; + + TrimBack(); + UpdateNumElements(); + } + + //Sets this to the BitArrayIntegerSet to the set that contains only elements that it and sis jointly contain + // does NOT update the number of elements, so UpdateNumElements must be called + void IntersectInBatch(SortedIntegerSet &sis) + { + if(numElements == 0) + return; + + if(sis.size() == 0) + { + clear(); + return; + } + + //remove elements off the top first for efficiency + size_t sis_end_index = sis.GetEndInteger(); + resize(sis_end_index); + size_t num_buckets = bitBucket.size(); + + //intersect + size_t cur_id = 0; + size_t cur_bucket = 0; + for(auto other_id : sis) + { + size_t other_id_bucket = GetBucket(other_id); + //if next id is beyond last bucket, then just truncate + if(other_id_bucket >= num_buckets) + { + bitBucket.resize(cur_bucket + 1); + break; + } + + //any buckets that need to be skipped should be zeroed out + if(other_id_bucket > cur_bucket) + { + //if there are any bits left in the last bucket after the last cur_id, clear them + size_t first_empty_bit = GetBit(cur_id); + if(first_empty_bit > 0) + { + size_t last_bucket_bitmask = (0xFFFFFFFFFFFFFFFFULL >> (numBitsPerBucket - first_empty_bit)); + bitBucket[cur_bucket] &= last_bucket_bitmask; + } + //set cur_id to the next id past the bucket + cur_bucket = GetBucket(cur_id + numBitsPerBucket - 1); + cur_id = numBitsPerBucket * cur_bucket; + + //zero out buckets skipped over + cur_id += numBitsPerBucket * (other_id_bucket - cur_bucket); + for(; cur_bucket < other_id_bucket; cur_bucket++) + bitBucket[cur_bucket] = 0; + } + + //zero out everything until the other id + auto &bucket_value = bitBucket[cur_bucket]; + for(; cur_id < other_id; cur_id++) + bucket_value &= ~(1ULL << GetBit(cur_id)); + + //cur_id and other_id are in both sets, so don't remove it + cur_id++; + cur_bucket = GetBucket(cur_id); + } + + //if there are any bits left in the last bucket after the last cur_id, clear them + if(cur_bucket < bitBucket.size()) + { + size_t first_empty_bit = GetBit(cur_id); + if(first_empty_bit > 0) + { + size_t last_bucket_bitmask = (0xFFFFFFFFFFFFFFFFULL >> (numBitsPerBucket - first_empty_bit)); + bitBucket[cur_bucket] &= last_bucket_bitmask; + } + } + + curMaxNumIndices = (bitBucket.size() * numBitsPerBucket); + TrimBack(); + } + + //Sets this to the BitArrayIntegerSet to the set that contains only elements that it and sis jointly contain + __forceinline void Intersect(SortedIntegerSet &sis) + { + IntersectInBatch(sis); + UpdateNumElements(); + } + + //flips the elements in the set starting with element 0 up to but not including up_to_id + // resetting the size of the container + void Not(size_t up_to_id) + { + if(up_to_id == 0) + { + clear(); + return; + } + + resize(up_to_id); + + //flip buckets up to the last bucket + size_t num_buckets = bitBucket.size(); + for(size_t i = 0; i < num_buckets; i++) + bitBucket[i] = ~bitBucket[i]; + + //clear any remaining bits in the last bucket + size_t up_to_bit = GetBit(up_to_id); + if(up_to_bit > 0) + { + size_t last_bucket_bitmask = (0xFFFFFFFFFFFFFFFFULL >> (numBitsPerBucket - up_to_bit)); + size_t last_bucket = num_buckets - 1; + bitBucket[last_bucket] &= last_bucket_bitmask; + } + + TrimBack(); + UpdateNumElements(); + } + + //sets elements to the flip of the elements in other up to but not including up_to_id + // up_to_id must be at least as large as the max index of other + void Not(BitArrayIntegerSet &other, size_t up_to_id) + { + if(up_to_id == 0) + { + clear(); + return; + } + + resize(up_to_id); + + //flip buckets up to the last other bucket + size_t num_other_buckets = other.bitBucket.size(); + for(size_t i = 0; i < num_other_buckets; i++) + bitBucket[i] = ~other.bitBucket[i]; + + //fill in any past the other's max + size_t num_buckets = bitBucket.size(); + for(size_t i = num_other_buckets; i < num_buckets; i++) + bitBucket[i] = 0xFFFFFFFFFFFFFFFFULL; + + //clear any remaining bits in the last bucket + size_t up_to_bit = GetBit(up_to_id); + if(up_to_bit > 0) + { + size_t last_bucket_bitmask = (0xFFFFFFFFFFFFFFFFULL >> (numBitsPerBucket - up_to_bit)); + size_t last_bucket = num_buckets - 1; + bitBucket[last_bucket] &= last_bucket_bitmask; + } + + TrimBack(); + UpdateNumElements(); + } + + //bits per bucket given uint64_t + static constexpr size_t numBitsPerBucket = 64; + +protected: + + //gets the bucket index for a given id + constexpr size_t GetBucket(size_t id) + { + return id / numBitsPerBucket; + } + + //gets the bit index for a given id + constexpr size_t GetBit(size_t id) + { + return id % numBitsPerBucket; + } + + constexpr size_t GetIndexFromBucketAndBit(size_t bucket, size_t bit) + { + return (bucket * numBitsPerBucket) + bit; + } + + //num elements that exist as inserted in the hash + size_t numElements; + + //maximum possible index for the given number of data buckets + size_t curMaxNumIndices; + + //buffer of bit buckets + std::vector bitBucket; +}; + +class EfficientIntegerSet +{ +public: + //defined to keep compatibility with stl containers + using value_type = size_t; + + EfficientIntegerSet() + : isSisContainer(true) + { } + + //assignment operator, deep copies + inline void operator =(const EfficientIntegerSet &other) + { + isSisContainer = other.isSisContainer; + + if(other.isSisContainer) + sisContainer = other.sisContainer; + else + baisContainer = other.baisContainer; + } + + //assignment operator, deep copies bit buffer + inline void operator =(const SortedIntegerSet &other) + { + baisContainer.clear(); + isSisContainer = true; + sisContainer = other; + } + + //assignment operator, deep copies bit buffer + inline void operator =(const BitArrayIntegerSet &other) + { + sisContainer.clear(); + isSisContainer = false; + baisContainer = other; + } + + //copies the data to other + inline void CopyTo(BitArrayIntegerSet &other) + { + if(isSisContainer) + { + other.clear(); + other.insert(sisContainer); + } + else + other = baisContainer; + } + + struct Iterator + { + inline Iterator(const Iterator &other) + { + isSisContainer = other.isSisContainer; + + if(other.isSisContainer) + sisIterator = other.sisIterator; + else + baisIterator = other.baisIterator; + } + + inline Iterator(SortedIntegerSet::Iterator _iterator) + { + sisIterator = _iterator; + isSisContainer = true; + } + + inline Iterator(BitArrayIntegerSet::Iterator _iterator) + { + baisIterator = _iterator; + isSisContainer = false; + } + + ~Iterator() + { } + + inline Iterator operator =(const Iterator &other) + { + isSisContainer = other.isSisContainer; + + if(other.isSisContainer) + sisIterator = other.sisIterator; + else + baisIterator = other.baisIterator; + + return *this; + } + + constexpr bool operator ==(const Iterator &other) + { + if(isSisContainer) + return (sisIterator == other.sisIterator); + else + return (baisIterator == other.baisIterator); + } + + constexpr bool operator !=(const Iterator &other) + { + if(isSisContainer) + return (sisIterator != other.sisIterator); + else + return (baisIterator != other.baisIterator); + } + + __forceinline Iterator &operator ++() + { + if(isSisContainer) + ++sisIterator; + else + ++baisIterator; + + return *this; + } + + //dereference operator + constexpr size_t operator *() + { + if(isSisContainer) + return *sisIterator; + else + return *baisIterator; + } + + SortedIntegerSet::Iterator sisIterator; + BitArrayIntegerSet::Iterator baisIterator; + + bool isSisContainer; + }; + + //std begin (must be lowercase) + __forceinline auto begin() + { + if(isSisContainer) + return Iterator(sisContainer.begin()); + else + return Iterator(baisContainer.begin()); + } + + //std end (must be lowercase) + __forceinline auto end() + { + if(isSisContainer) + return Iterator(sisContainer.end()); + else + return Iterator(baisContainer.end()); + } + + //iterates over all elements in the container, passing in the value to func + //this is intended for fast operations performed at volume, where even small bits + //of extra logic in the iterator would affect performance + template + __forceinline void IterateFunctionOverElements(ElementFunc func) + { + if(isSisContainer) + { + for(auto element : sisContainer) + func(element); + } + else + { + for(auto element : baisContainer) + func(element); + } + } + + //returns the nth id in the set by sorted order + inline size_t GetNthElement(size_t n) + { + if(isSisContainer) + return sisContainer.GetNthElement(n); + else + return baisContainer.GetNthElement(n); + } + + //gets a random element in a performant way + // note that if it is a bais container, it will not necessarily obtain elements with uniform probability + inline size_t GetRandomElement(RandomStream &random_stream) + { + if(isSisContainer) + return sisContainer.GetRandomElement(random_stream); + else + return baisContainer.GetRandomElement(random_stream); + } + + //clears the container as if it is new + inline void clear() + { + if(isSisContainer) + sisContainer.clear(); + else + baisContainer.clear(); + } + + //returns the number of elements that exist + __forceinline size_t size() + { + if(isSisContainer) + return sisContainer.size(); + else + return baisContainer.size(); + } + + //reserves the number of elements to be inserted + __forceinline void ReserveNumIntegers(size_t num_elements) + { + if(isSisContainer) + sisContainer.ReserveNumIntegers(num_elements); + else + baisContainer.ReserveNumIntegers(num_elements); + } + + //returns one past the maximum index in the container, 0 if empty + inline size_t GetEndInteger() + { + if(isSisContainer) + return sisContainer.GetEndInteger(); + else + return baisContainer.GetEndInteger(); + } + + //returns true if the id exists in the set + inline bool contains(size_t id) + { + if(isSisContainer) + return sisContainer.contains(id); + else + return baisContainer.contains(id); + } + + //returns true if the id exists in the set + inline bool operator [](size_t id) + { + if(isSisContainer) + return sisContainer.contains(id); + else + return baisContainer.contains(id); + } + + //sets all up_to_id integers to true/exist + void SetAllIds(size_t up_to_id) + { + if(isSisContainer) + ConvertSisToBais(); + + baisContainer.SetAllIds(up_to_id); + } + + //inserts id into set, does nothing if id already exists + void insert(size_t id) + { + if(isSisContainer) + { + sisContainer.insert(id); + ConvertSisToBaisIfBetter(); + } + else + { + baisContainer.insert(id); + ConvertBaisToSisIfBetter(); + } + } + + //inserts all elements from other + __forceinline void InsertInBatch(EfficientIntegerSet &other) + { + if(other.isSisContainer) + { + if(isSisContainer) + sisContainer.InsertInBatch(other.sisContainer); + else + baisContainer.InsertInBatch(other.sisContainer); + } + else + { + if(isSisContainer) + sisContainer.InsertInBatch(other.baisContainer); + else + baisContainer.InsertInBatch(other.baisContainer); + } + } + + //inserts all elements in collection + template + __forceinline void InsertInBatch(Collection &other) + { + if(isSisContainer) + sisContainer.InsertInBatch(other); + else + baisContainer.InsertInBatch(other); + } + + //quickly inserts an id + // it assumes that the id is larger than GetEndInteger() + inline void InsertNewLargestInteger(size_t id) + { + if(isSisContainer) + { + sisContainer.InsertNewLargestInteger(id); + ConvertSisToBaisIfBetter(); + } + else + { + baisContainer.insert(id); + ConvertBaisToSisIfBetter(); + } + } + + //removes id from hash set, does nothing if id does not exist in the hash + void erase(size_t id) + { + if(isSisContainer) + { + sisContainer.erase(id); + ConvertSisToBaisIfBetter(); + } + else + { + baisContainer.erase(id); + ConvertBaisToSisIfBetter(); + } + } + + //removes all elements contained by other + void erase(EfficientIntegerSet &other) + { + if(isSisContainer) + { + sisContainer.erase(other); + ConvertSisToBaisIfBetter(); + } + else + { + baisContainer.erase(other); + ConvertBaisToSisIfBetter(); + } + } + + //removs all elements of this container from other + void EraseTo(BitArrayIntegerSet &other) + { + if(isSisContainer) + other.erase(sisContainer); + else + other.erase(baisContainer); + } + + //removes all elements contained by other, intended for calling in a batch + template + inline void EraseInBatch(Container &other) + { + if(isSisContainer) + { + sisContainer.EraseInBatch(other); + ConvertSisToBaisIfBetter(); + } + else + { + baisContainer.EraseInBatch(other); + ConvertBaisToSisIfBetter(); + } + } + + //removes all elements from other in this container, intended for calling in a batch + void EraseInBatchFrom(BitArrayIntegerSet &other) + { + if(isSisContainer) + other.EraseInBatch(sisContainer); + else + other.EraseInBatch(baisContainer); + } + + //removes all elements contained by other, intended for calling in a batch + inline void EraseInBatch(EfficientIntegerSet &other) + { + if(isSisContainer) + { + if(other.isSisContainer) + sisContainer.EraseInBatch(other.sisContainer); + else + sisContainer.EraseInBatch(other.baisContainer); + + ConvertSisToBaisIfBetter(); + } + else + { + if(other.isSisContainer) + baisContainer.EraseInBatch(other.sisContainer); + else + baisContainer.EraseInBatch(other.baisContainer); + + ConvertBaisToSisIfBetter(); + } + } + + //removes the id and returns true if it was in the id before removal + inline bool EraseAndRetrieve(size_t id) + { + if(isSisContainer) + { + if(sisContainer.EraseAndRetrieve(id)) + { + ConvertSisToBaisIfBetter(); + return true; + } + } + else + { + if(baisContainer.EraseAndRetrieve(id)) + { + ConvertBaisToSisIfBetter(); + return true; + } + } + + return false; + } + + //updates the number of elements + void UpdateNumElements() + { + if(isSisContainer) + { + sisContainer.UpdateNumElements(); + ConvertSisToBaisIfBetter(); + } + else + { + baisContainer.UpdateNumElements(); + ConvertBaisToSisIfBetter(); + } + } + + //sets this to the set that contains all elements of itself or other + void Union(EfficientIntegerSet &other) + { + //see if should convert to bais before merging to speed things up + if(isSisContainer) + { + size_t lower_bound_num_elements = std::max(sisContainer.size(), other.size()); + size_t lower_bound_max_size = std::max(sisContainer.GetEndInteger(), other.GetEndInteger()); + if(IsBaisPreferredToSis(lower_bound_num_elements, lower_bound_max_size)) + ConvertSisToBais(); + } + + if(isSisContainer) + { + if(other.isSisContainer) + sisContainer.insert(other.sisContainer); + else + sisContainer.insert(other.baisContainer); + + ConvertSisToBaisIfBetter(); + } + else + { + if(other.isSisContainer) + baisContainer.insert(other.sisContainer); + else + baisContainer.Union(other.baisContainer); + + ConvertBaisToSisIfBetter(); + } + } + + //sets other to the set that contains all elements of itself or other + inline void UnionTo(BitArrayIntegerSet &other) + { + if(IsSisContainer()) + other.insert(sisContainer); + else + other.Union(baisContainer); + } + + //sets this to the set that contains only elements that it and other jointly contain + void Intersect(EfficientIntegerSet &other) + { + //see if should convert to sis before merging to speed things up + if(!isSisContainer) + { + size_t upper_bound_num_elements = std::min(sisContainer.size(), other.size()); + size_t upper_bound_max_size = std::min(sisContainer.GetEndInteger(), other.GetEndInteger()); + if(IsSisPreferredToBais(upper_bound_num_elements, upper_bound_max_size)) + ConvertBaisToSis(); + } + + if(isSisContainer) + { + if(other.isSisContainer) + sisContainer.Intersect(other.sisContainer); + else + sisContainer.Intersect(other.baisContainer); + + ConvertSisToBaisIfBetter(); + } + else + { + if(other.isSisContainer) + baisContainer.Intersect(other.sisContainer); + else + baisContainer.Intersect(other.baisContainer); + + ConvertBaisToSisIfBetter(); + } + } + + //sets other to the set that contains only elements that it and other jointly contain + inline void IntersectTo(BitArrayIntegerSet &other) + { + if(IsSisContainer()) + other.Intersect(sisContainer); + else + other.Intersect(baisContainer); + } + + //flips the elements in the set starting with element 0 up to but not including up_to_id + // resetting the size of the container + void Not(size_t up_to_id) + { + if(isSisContainer) + { + //if it was a sisContainer, then it was sparse, so convert to baisContainer + //set all and remove those from sisContainer + baisContainer.SetAllIds(up_to_id); + baisContainer.erase(sisContainer); + sisContainer.clear(); + isSisContainer = false; + } + else + { + baisContainer.Not(up_to_id); + ConvertBaisToSisIfBetter(); + } + } + + //sets elements to the flip of the elements in other up to but not including up_to_id + // up_to_id must be at least as large as the max index of other + template + void Not(Container &other, size_t up_to_id) + { + clear(); + isSisContainer = false; + + if(other.isSisContainer) + { + //if it was a sisContainer, then it was sparse, so convert to baisContainer + //set all and remove those from sisContainer + baisContainer.SetAllIds(up_to_id); + baisContainer.erase(other.sisContainer); + } + else + { + baisContainer.Not(other.baisContainer, up_to_id); + ConvertBaisToSisIfBetter(); + } + } + + //sets other's elements to the flip of the elements up to but not including up_to_id + // up_to_id must be at least as large as the max index of other + void NotTo(BitArrayIntegerSet &other, size_t up_to_id) + { + if(isSisContainer) + { + other.SetAllIds(up_to_id); + other.erase(sisContainer); + } + else + { + other.Not(baisContainer, up_to_id); + } + } + + //functions for specialized use + + constexpr bool IsSisContainer() + { + return isSisContainer; + } + + constexpr bool IsBaisContainer() + { + return !isSisContainer; + } + + constexpr auto &GetSisContainer() + { + return sisContainer; + } + + constexpr auto &GetBaisContainer() + { + return baisContainer; + } + +protected: + + //returns true if it would be more efficient to convert from sis to bais + //assumes conitainer is already sis + inline bool IsBaisPreferredToSis(size_t num_elements, size_t max_element) + { + //add 1 to round up to make it less likely to flip back and forth between types + size_t num_bais_elements_required = ((max_element + BitArrayIntegerSet::numBitsPerBucket - 1) / BitArrayIntegerSet::numBitsPerBucket) + 1; + //use a heuristic of 2 values per bais bucket, since some operations are faster when can just iterate over a list + return (num_elements > 2 * num_bais_elements_required); + } + + //returns true if it would be more efficient to convert from bais to sis + //assumes conitainer is already bais + inline bool IsSisPreferredToBais(size_t num_elements, size_t max_element) + { + //round this down (don't take ceil) to make it less likely to flip back and forth between types + size_t num_bais_elements_required = (max_element + BitArrayIntegerSet::numBitsPerBucket - 1) / BitArrayIntegerSet::numBitsPerBucket; + //use a heuristic of 2 values per bais bucket, since some operations are faster when can just iterate over a list + return (2 * num_bais_elements_required > num_elements); + } + + //converts data storage to bais; assumes it is already sis + inline void ConvertSisToBais() + { + baisContainer.InsertInBatch(sisContainer); + sisContainer.clear(); + isSisContainer = false; + } + + //converts data storage to sis; assumes it is already bais + inline void ConvertBaisToSis() + { + sisContainer.InsertNewSortedIntegers(baisContainer); + baisContainer.clear(); + isSisContainer = true; + } + + //automatically converts Sis to Bais when better + //assumes isSisContainer is true + __forceinline void ConvertSisToBaisIfBetter() + { + if(IsBaisPreferredToSis(sisContainer.size(), sisContainer.GetEndInteger())) + ConvertSisToBais(); + } + + //automatically converts Bais to Sis when better + //assumes isSisContainer is false + __forceinline void ConvertBaisToSisIfBetter() + { + if(IsSisPreferredToBais(baisContainer.size(), baisContainer.GetEndInteger())) + ConvertBaisToSis(); + } + + //if true, use sisContainer, if false use baisContainer + bool isSisContainer; + + //keep both container types + SortedIntegerSet sisContainer; + BitArrayIntegerSet baisContainer; +}; diff --git a/src/Amalgam/KnnCache.h b/src/Amalgam/KnnCache.h new file mode 100644 index 00000000..b4b75b0f --- /dev/null +++ b/src/Amalgam/KnnCache.h @@ -0,0 +1,175 @@ +#pragma once + +//project headers: +#include "Concurrency.h" +#include "SeparableBoxFilterDataStore.h" + +//system headers: +#include + +//caches nearest neighbor results for every entity in the provided data structure +// will attempt to find nonzero distances whenever possible and will expand the search out as far as it can in its attempt +class KnnNonZeroDistanceQuerySBFCache +{ +public: + KnnNonZeroDistanceQuerySBFCache() + { + sbfDataStore = nullptr; + } + + //clears all buffers and resizes and resets them based on the datastore of entities and the particular + // relevant_indices to use from the datastore + void ResetCache(SeparableBoxFilterDataStore &datastore, BitArrayIntegerSet &relevant_indices, + GeneralizedDistance &dist_params, std::vector &position_label_ids) + { + sbfDataStore = &datastore; + relevantIndices = &relevant_indices; + distParams = &dist_params; + positionLabelIds = &position_label_ids; + + cachedNeighbors.clear(); + cachedNeighbors.resize(sbfDataStore->GetNumInsertedEntities()); + } + + //gets the nearest neighbors to the index and caches them + //this may expand k so that at least one non-zero distance is returned - if that is not possible then it will return all entities +#ifdef MULTITHREAD_SUPPORT + void PreCacheAllKnn(size_t top_k, bool run_concurrently) +#else + void PreCacheAllKnn(size_t top_k) +#endif + { + + #ifdef MULTITHREAD_SUPPORT + if(run_concurrently && relevantIndices->size() > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + std::vector> indices_completed; + indices_completed.reserve(relevantIndices->size()); + + for(auto index : *relevantIndices) + { + //fill in cache entry if it is not sufficient + if(top_k > cachedNeighbors[index].size()) + { + indices_completed.emplace_back( + Concurrency::threadPool.EnqueueBatchTask( + [this, index, top_k] + { + // could have knn cache constructor take in dist params and just get top_k from there, so don't need to pass it in everywhere + sbfDataStore->FindEntitiesNearestToIndexedEntity(distParams, *positionLabelIds, true, index, + top_k, *relevantIndices, true, cachedNeighbors[index]); + } + ) + ); + } + } + + enqueue_task_lock.Unlock(); + Concurrency::threadPool.CountCurrentThreadAsPaused(); + + for(auto &future : indices_completed) + future.wait(); + + Concurrency::threadPool.CountCurrentThreadAsResumed(); + + return; + } + } + //not running concurrently + #endif + + for(auto index : *relevantIndices) + { + //fill in cache entry if it is not sufficient + if(top_k > cachedNeighbors[index].size()) + { + cachedNeighbors[index].clear(); + sbfDataStore->FindEntitiesNearestToIndexedEntity(distParams, *positionLabelIds, false, index, top_k, *relevantIndices, true, cachedNeighbors[index]); + } + } + } + + //returns true if the cached entities nearest to index contain other_index within top_k + bool DoesCachedKnnContainEntity(size_t index, size_t other_index, size_t top_k) + { + for(size_t i = 0; i < top_k && i < cachedNeighbors[index].size(); i++) + { + if(cachedNeighbors[index][i].reference == other_index) + return true; + } + + return false; + } + + //gets the top_k nearest neighbor results of entities for the given index, excluding the additional_holdout_index, sets out to the results + //this may expand k so that at least one non-zero distance is returned - if that is not possible then it will return all entities + void GetKnn(size_t index, size_t top_k, std::vector> &out, + size_t additional_holdout_index = std::numeric_limits::max()) + { + for(auto &neighbor : cachedNeighbors[index]) + { + if(neighbor.reference == additional_holdout_index) + continue; + + out.push_back(neighbor); + + //done if have fulfilled top_k and the distance isn't 0 + if(out.size() >= top_k && neighbor.distance != 0.0) + return; + } + + //there were not enough results for this search, just do a new search + out.clear(); + sbfDataStore->FindEntitiesNearestToIndexedEntity(distParams, *positionLabelIds, false, index, top_k, *relevantIndices, true, out, additional_holdout_index); + } + + //like the other GetKnn, but only considers from_indices + void GetKnn(size_t index, size_t top_k, std::vector> &out, BitArrayIntegerSet &from_indices) + { + for(auto &neighbor : cachedNeighbors[index]) + { + if(!from_indices.contains(neighbor.reference)) + continue; + + out.push_back(neighbor); + + if(out.size() >= top_k && neighbor.distance != 0.0) + return; + } + + //there were not enough results for this search, just do a new search + out.clear(); + sbfDataStore->FindEntitiesNearestToIndexedEntity(distParams, *positionLabelIds, false, index, top_k, from_indices, true, out); + } + + //returns a pointer to the relevant indices of the cache + constexpr BitArrayIntegerSet *GetRelevantEntities() + { + return relevantIndices; + } + + //returns the number of relevant indices in the cache + inline size_t GetNumRelevantEntities() + { + return relevantIndices->size(); + } + +private: + //cache of nearest neighbor results. The index of cache is the entity, and the corresponding vector are its nearest neighbors. + std::vector>> cachedNeighbors; + + //pointer to datastore used to populate cache + SeparableBoxFilterDataStore *sbfDataStore; + + //distance parameters for the search + GeneralizedDistance *distParams; + + //position labels + std::vector *positionLabelIds; + + //pointer to the indices of relevant entities used to populate the cache + BitArrayIntegerSet *relevantIndices; +}; diff --git a/src/Amalgam/Merger.h b/src/Amalgam/Merger.h new file mode 100644 index 00000000..c0972d50 --- /dev/null +++ b/src/Amalgam/Merger.h @@ -0,0 +1,678 @@ +#pragma once + +//project headers: +#include "HashMaps.h" +#include "StringInternPool.h" + +//system headers: +#include +#include +#include +#include + +//Contains the data from evaluating the goodness or commonality of merging two or more things, +//but without the things merged +class MergeMetricResultsBase +{ +public: + //starts off with an exact match of nothing + constexpr MergeMetricResultsBase() + : commonality(0.0), mustMatch(false), exactMatch(false) + { } + + constexpr MergeMetricResultsBase(double _similarity, bool must_match = false, bool exact_match = true) + : commonality(_similarity), mustMatch(must_match), exactMatch(exact_match) + { } + + //adds the commonality and keeps track of whether it is an exact match + constexpr void AccumulateResults(const MergeMetricResultsBase &mmr) + { + commonality += mmr.commonality; + + if(exactMatch && !mmr.exactMatch) + exactMatch = false; + } + + //syntactic sugar for accumulating merge metric results + constexpr MergeMetricResultsBase &operator +=(const MergeMetricResultsBase &mmr) + { + AccumulateResults(mmr); + return *this; + } + + //returns true if this entity has more favorable matching results than mmr + // if require_nontrivial_match, then it requires at least one node or atomic value to be equal + constexpr bool IsBetterMatchThan(const MergeMetricResultsBase &mmr) + { + if(mmr.mustMatch) + return false; + if(mustMatch) + return true; + + //if same amount of commonality, prefer exact matches + if(commonality == mmr.commonality) + { + if(mmr.exactMatch && !exactMatch) + return false; + if(exactMatch && !mmr.exactMatch) + return true; + } + + return commonality > mmr.commonality; + } + + //syntactic sugar for comparing merge metric results + constexpr bool operator >(const MergeMetricResultsBase &mmr) + { + return IsBetterMatchThan(mmr); + } + + //returns true if the match is substantial enough that it has at least one equal value of its atoms + constexpr bool IsNontrivialMatch() + { + return exactMatch || mustMatch || commonality >= 1.0; + } + + //A value between indicating the commonality of the two sets of data being compared + double commonality; + + //if true, the data must be matched regardless of commonality (e.g., have the same label) + bool mustMatch; + + //if true, then the data were an exact match + bool exactMatch; +}; + +//Contains the data from evaluating the goodness or commonality of merging two or more things, +//with the things merged of type ElementType +template +class MergeMetricResults : public MergeMetricResultsBase +{ +public: + //starts off with an exact match of nothing + //note that if ElementType is a pointer or has a nondefault constructor, + //C++ initializes to 0 or nullptr + constexpr MergeMetricResults() + : MergeMetricResultsBase(), elementA(), elementB() + { } + + constexpr MergeMetricResults(double _similarity, ElementType a, ElementType b, bool must_match = false, bool exact_match = true) + : MergeMetricResultsBase(_similarity, must_match, exact_match), elementA(a), elementB(b) + { } + + //the two elements being compared + ElementType elementA; + ElementType elementB; +}; + +//implements a very simple 2d matrix of data using one vector +template +class FlatMatrix +{ +public: + void ClearAndResize(size_t size1, size_t size2) + { + firstDimensionSize = size1; + secondDimensionSize = size2; + flatMatrix.clear(); + flatMatrix.resize(size1 * size2); + } + + //returns the matrix value at pos1, pos2 + constexpr ElementType &At(size_t pos1, size_t pos2) + { + return flatMatrix[firstDimensionSize * pos2 + pos1]; + } + + size_t firstDimensionSize; + size_t secondDimensionSize; + std::vector flatMatrix; +}; + +//computes the commonality matrix for computing edit distances between vectors a and b +//the technique is similar to the WagnerFischer algorithm, except for commonality rather than distance +//commonality_function is used to return the commonality of two given elements of ElementType, +//and starting_index can be specified if some elements should be skipped before computing commonality +template +void ComputeSequenceCommonalityMatrix(FlatMatrix &sequence_commonality, + std::vector &a, std::vector &b, + CommonalityFunction commonality_function, size_t starting_index = 0) +{ + size_t a_size = a.size(); + size_t b_size = b.size(); + sequence_commonality.ClearAndResize(a_size + 1, b_size + 1); + + //start at second location so can compare to previous + starting_index++; + + //check all possible orders and accumulate, but skip first index + for(size_t i = starting_index; i <= a_size; i++) + { + for(size_t j = starting_index; j <= b_size; j++) + { + auto prev_with_new_match = sequence_commonality.At(i - 1, j - 1); + prev_with_new_match += commonality_function(a[i - 1], b[j - 1]); + + //assign sequence_commonality[i][j] the best of sequence_commonality[i][j - 1], sequence_commonality[i - 1][j]), + //or sequence_commonality[i - 1][j - 1] + commonality_function(a[i - 1], b[j - 1]) + if(sequence_commonality.At(i, j - 1) > sequence_commonality.At(i - 1, j)) + { + if(sequence_commonality.At(i, j - 1) > prev_with_new_match) + sequence_commonality.At(i, j) = sequence_commonality.At(i, j - 1); + else + sequence_commonality.At(i, j) = prev_with_new_match; + } + else + { + if(sequence_commonality.At(i - 1, j) > prev_with_new_match) + sequence_commonality.At(i, j) = sequence_commonality.At(i - 1, j); + else + sequence_commonality.At(i, j) = prev_with_new_match; + } + + } + } +} + +//Merges elements of type T +// AssocType should be some map where the variables are pointers to T +template> +class Merger +{ +public: + //Evaluates the commonality between values specified + virtual MergeMetricResults MergeMetric(T a, T b) = 0; + + //Yields a new value to put into the merged list that is being built + // if must_merge is true, then it must attempt to create something merging the entities, preferring + // the value that is more valid if applicable + virtual T MergeValues(T a, T b, bool must_merge = false) = 0; + + //Returns true if the merge should keep all elements that do not have a corresponding element to merge with + virtual bool KeepAllNonMergeableValues() = 0; + + //Returns true if the merge should keep some elements that do not have a corresponding element to merge with + //if KeepAllNonMergeableValues retursn true, then this should return true too + virtual bool KeepSomeNonMergeableValues() = 0; + + //Returns true if the merge should keep one of either particular element, a or b, that does not have a corresponding element + //may be stochastic + virtual bool KeepNonMergeableValue() = 0; + + //Returns true if the merge should keep element a instead of element b during for the merge + //assumes that KeepNonMergeableValue has returned true, because that means either a or b was selected, + //and this does the selecting + //may be stochastic + virtual bool KeepNonMergeableAInsteadOfB() = 0; + + //Returns true if the merge should keep the corresponding element during a merge + //may be stochastic + virtual bool KeepNonMergeableA() = 0; + virtual bool KeepNonMergeableB() = 0; + + //Returns true if the merge should attempt to merge two elements that are not necessarily matches + //may be stochastic + virtual bool AreMergeable(T a, T b) = 0; + + //Merges two unordered lists based on the specified MergeMethods + std::vector MergeUnorderedSets(std::vector &list_a, std::vector &list_b) + { + //return empty if nothing passed in + if(list_a.empty() && list_b.empty()) + return std::vector(); + + //copy over lists + std::vector a1(list_a); + std::vector a2(list_b); + + std::vector merged; + + std::vector unmatched_a1; + if(KeepAllNonMergeableValues()) + { + merged.reserve(std::max(a1.size(), a2.size())); + unmatched_a1.reserve(a1.size()); + } + + //for every element in a1, find best match (if one exists) in a2 + while(a1.size() > 0) + { + //look to see if there's a matching node + bool best_match_found = false; + size_t best_match_index = 0; + MergeMetricResults best_match_value; + for(size_t match_index = 0; match_index < a2.size(); match_index++) + { + auto match_value = MergeMetric(a1[0], a2[match_index]); + if(match_value.IsNontrivialMatch() + && (!best_match_found || match_value > best_match_value)) + { + best_match_found = true; + best_match_value = match_value; + best_match_index = match_index; + } + } + + //if found a match, merge the trees, then remove it from the match list and put it in the list + if(best_match_found) + { + T m = MergeValues(a1[0], a2[best_match_index]); + merged.emplace_back(m); + + a2.erase(begin(a2) + best_match_index); + } + else + { + //no match, so keep it for later if keeping all + if(KeepSomeNonMergeableValues()) + unmatched_a1.emplace_back(a1[0]); + } + + //remove from the first list + a1.erase(begin(a1)); + } + + //add on remainder if keeping all or some that weren't mergeable + if(KeepSomeNonMergeableValues()) + { + for(auto &n : unmatched_a1) + { + if(!KeepNonMergeableA()) + continue; + + T m = MergeValues(n, NullValue, true); + merged.emplace_back(m); + } + + for(auto &n : a2) + { + if(!KeepNonMergeableB()) + continue; + + T m = MergeValues(NullValue, n, true); + merged.emplace_back(m); + } + } + + return merged; + } + + //Merges two lists that are comprised of unordered sets of pairs based on the specified MergeMethods + std::vector MergeUnorderedSetsOfPairs(std::vector &list_a, std::vector &list_b) + { + //return empty if nothing passed in + if(list_a.empty() && list_b.empty()) + return std::vector(); + + //copy over lists + std::vector a1(list_a); + std::vector a2(list_b); + + std::vector merged; + + std::vector unmatched_a1; + if(KeepAllNonMergeableValues()) + { + merged.reserve(std::max(a1.size(), a2.size())); + unmatched_a1.reserve(a1.size()); + } + + //for every element in a1, find best match (if one exists) in a2 + while(a1.size() > 0) + { + //look to see if there's a matching node + bool best_match_found = false; + size_t best_match_index = 0; + MergeMetricResults best_match_value; + for(size_t match_index = 0; match_index < a2.size(); match_index += 2) + { + auto match_value = MergeMetric(a1[0], a2[match_index]); + if(match_value.IsNontrivialMatch() && match_value > best_match_value) + { + best_match_found = true; + best_match_value = match_value; + best_match_index = match_index; + } + } + + //if found a match, merge the trees, then remove it from the match list and put it in the list + if(best_match_found) + { + //merge the keys + T m_key = MergeValues(a1[0], a2[best_match_index]); + + //get both values if exist and remove key and value from second list (first list will be cleaned up at the end) + T m_value_1 = 0; + if(a1.size() > 1) + m_value_1 = a1[1]; + + T m_value_2 = 0; + if(a2.size() > best_match_index + 1) + { + m_value_2 = a2[best_match_index + 1]; + a2.erase(begin(a2) + best_match_index + 1); + } + if(a2.size() > best_match_index) + a2.erase(begin(a2) + best_match_index); + + //merge the values + T m_value = MergeValues(m_value_1, m_value_2); + + merged.emplace_back(m_key); + merged.emplace_back(m_value); + } + else + { + //no match, so keep it for later if keeping all + if(KeepSomeNonMergeableValues()) + { + unmatched_a1.emplace_back(a1[0]); + if(a1.size() > 1) + unmatched_a1.emplace_back(a1[1]); + } + } + + //remove the key-value pair from the first list + a1.erase(begin(a1)); + if(a1.size() > 0) + a1.erase(begin(a1)); + } + + //add on remainder if keeping all or some that weren't mergeable + if(KeepSomeNonMergeableValues()) + { + for(size_t i = 0; i < unmatched_a1.size(); i+= 2) + { + if(!KeepNonMergeableA()) + continue; + + T m = MergeValues(unmatched_a1[i], NullValue, true); + merged.emplace_back(m); + + if(i + 1 < unmatched_a1.size()) + merged.emplace_back(unmatched_a1[i + 1]); + else + merged.emplace_back(NullValue); + } + + for(size_t i = 0; i < a2.size(); i += 2) + { + if(!KeepNonMergeableB()) + continue; + + T m = MergeValues(a2[i], NullValue, true); + merged.emplace_back(m); + if(i + 1 < a2.size()) + merged.emplace_back(a2[i + 1]); + else + merged.emplace_back(NullValue); + } + } + + return merged; + } + + //Merges two ordered (sequence) lists based on the specified MergeMethods + std::vector MergeSequences(std::vector &list_a, std::vector &list_b) + { + //return empty if nothing passed in + if(list_a.empty() && list_b.empty()) + return std::vector(); + + //build sequence commonality matrix + FlatMatrix> sequence_commonality; + ComputeSequenceCommonalityMatrix(sequence_commonality, list_a, list_b, + [this] + (T a, T b) + { + return MergeMetric(a, b); + }); + + //build a new list, in reverse + std::vector merged; + if(KeepAllNonMergeableValues()) + merged.reserve(std::max(list_a.size(), list_b.size())); + + //start in the maximal position + auto a_index = list_a.size(); + auto b_index = list_b.size(); + + //iterate over everything, finding which was the maximal path + while(a_index > 0 && b_index > 0) + { + //if it's not a good match or worse than matching with the next one down in b, then take one from b + if(!sequence_commonality.At(a_index, b_index).IsNontrivialMatch() + || !sequence_commonality.At(a_index, b_index).IsBetterMatchThan(sequence_commonality.At(a_index, b_index - 1))) + { + b_index--; + if(KeepNonMergeableB()) + { + T m = MergeValues(NullValue, list_b[b_index], true); + merged.emplace_back(m); + } + continue; + } + + //if it's not better to merge with the next one down in a, then take a off + if(!sequence_commonality.At(a_index, b_index).IsBetterMatchThan(sequence_commonality.At(a_index - 1, b_index))) + { + a_index--; + if(KeepNonMergeableB()) + { + T m = MergeValues(list_a[a_index], NullValue, true); + merged.emplace_back(m); + } + continue; + } + + //must be that it's kept in both; if mergeable, merge, if not, take both if applicable + a_index--; + b_index--; + if(AreMergeable(list_a[a_index], list_b[b_index])) + { + T m = MergeValues(list_a[a_index], list_b[b_index]); + merged.emplace_back(m); + } + else + { + if(KeepNonMergeableA()) + merged.push_back(list_a[a_index]); + if(KeepNonMergeableB()) + merged.push_back(list_b[b_index]); + } + } + + //put any remaining elements of either array on if keeping all or some that weren't mergeable + if(KeepSomeNonMergeableValues()) + { + while(a_index > 0) + { + a_index--; + + if(!KeepNonMergeableA()) + continue; + + T m = MergeValues(list_a[a_index], NullValue, true); + merged.emplace_back(m); + } + + while(b_index > 0) + { + b_index--; + + if(!KeepNonMergeableB()) + continue; + + T m = MergeValues(NullValue, list_b[b_index], true); + merged.emplace_back(m); + } + } + + //put back in the right order + std::reverse(begin(merged), end(merged)); + return merged; + } + + //Merges two position-based ordered lists based on the specified MergeMethods + std::vector MergePositions(std::vector &list_a, std::vector &list_b) + { + //return empty if nothing passed in + if(list_a.empty() && list_b.empty()) + return std::vector(); + + //accumulate the array + std::vector merged; + if(KeepAllNonMergeableValues()) + merged.reserve(std::max(list_a.size(), list_b.size())); + + //use size of smallest list and merge all positions that are common + size_t smallest_list_size = std::min(list_a.size(), list_b.size()); + for(size_t i = 0; i < smallest_list_size; i++) + { + T generalized_child = MergeValues(list_a[i], list_b[i]); + merged.emplace_back(generalized_child); + } + + if(KeepSomeNonMergeableValues()) + { + //merge anything left in a + for(auto i = smallest_list_size; i < list_a.size(); i++) + { + if(KeepNonMergeableA()) + { + T generalized_child = MergeValues(list_a[i], NullValue, true); + merged.emplace_back(generalized_child); + } + else + { + merged.emplace_back(NullValue); + } + } + + //merge anything left in b + for(auto i = smallest_list_size; i < list_b.size(); i++) + { + if(KeepNonMergeableB()) + { + T generalized_child = MergeValues(NullValue, list_b[i], true); + merged.emplace_back(generalized_child); + } + else + { + merged.emplace_back(NullValue); + } + } + } + + return merged; + } + + //Merges two mappings based on the specified MergeMethods + AssocType MergeMaps(AssocType &map_a, AssocType &map_b) + { + AssocType merged; + + if(map_a.empty() && map_b.empty()) + return merged; + + //if not potentially keeping any that are common, + //can just do a quick pass finding those common in both + if(!KeepSomeNonMergeableValues()) + { + //see if both trees have mapped child nodes + if(map_a.size() > 0 && map_b.size() > 0) + { + //use keys from first node + for(auto &[n_key, n_value] : map_a) + { + //skip unless both trees have the key + auto found_b = map_b.find(n_key); + if(found_b == end(map_b)) + continue; + + //merge what's under the key + auto m_value = MergeValues(n_value, found_b->second); + merged[n_key] = m_value; + } + } + + return merged; + } + //else need to track some that might be in either or both + + //fast iteration if one element doesn't have any nodes + if(map_a.size() > 0 && map_b.empty()) + { + //merge all values with null + for(auto &[n_key, n_value] : map_a) + { + if(!KeepNonMergeableA()) + continue; + + auto m_value = MergeValues(n_value, NullValue, true); + merged[n_key] = m_value; + } + } + else if(map_a.empty() && map_b.size() > 0) + { + //merge all values with null + for(auto &[n_key, n_value] : map_b) + { + if(!KeepNonMergeableB()) + continue; + + auto m_value = MergeValues(NullValue, n_value, true); + merged[n_key] = m_value; + } + } + else if(map_a.size() > 0 && map_b.size() > 0) + { + //include all keys that are in both nodes + for(auto &[n_key, _] : map_a) + { + if(map_b.find(n_key) != end(map_b)) + merged.emplace(n_key, NullValue); + } + size_t num_common_indices = merged.size(); + + //keep those from a and b as appropriate + //but can skip if the merged is the same size as the map + if(map_a.size() != num_common_indices) + { + for(auto &[n_key, _] : map_a) + { + if(KeepNonMergeableA()) + merged.emplace(n_key, NullValue); + } + } + + if(map_b.size() != num_common_indices) + { + for(auto &[n_key, _] : map_b) + { + if(KeepNonMergeableB()) + merged.emplace(n_key, NullValue); + } + } + + for(auto &[m_key, m_value] : merged) + { + //merge what's under the key + auto found_a = map_a.find(m_key); + auto found_b = map_b.find(m_key); + + //if found both, merge both + if(found_a != end(map_a) && found_b != end(map_b)) + m_value = MergeValues(found_a->second, found_b->second); + else if(found_b == end(map_b)) + m_value = MergeValues(found_a->second, NullValue, true); + else //a not found + m_value = MergeValues(NullValue, found_b->second, true); + } + } + + return merged; + } +}; diff --git a/src/Amalgam/Opcodes.cpp b/src/Amalgam/Opcodes.cpp new file mode 100644 index 00000000..2d94cc4b --- /dev/null +++ b/src/Amalgam/Opcodes.cpp @@ -0,0 +1,371 @@ +//project headers: +#include "Opcodes.h" +#include "StringInternPool.h" + +void StringInternPool::InitializeStaticStrings() +{ + numStaticStrings = ENBISI_FIRST_DYNAMIC_STRING; + stringToID.reserve(numStaticStrings); + idToStringAndRefCount.resize(numStaticStrings); + + EmplaceStaticString(ENBISI_NOT_A_STRING, ".nas"); + EmplaceStaticString(ENBISI_EMPTY_STRING, ""); + + + //opcodes + + //built-in / system specific + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SYSTEM), "system"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_DEFAULTS), "get_defaults"); + + //parsing + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_PARSE), "parse"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_UNPARSE), "unparse"); + + //core control + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_IF), "if"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SEQUENCE), "seq"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_PARALLEL), "parallel"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LAMBDA), "lambda"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CONCLUDE), "conclude"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CALL), "call"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CALL_SANDBOXED), "call_sandboxed"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_WHILE), "while"); + + //definitions + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LET), "let"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_DECLARE), "declare"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ASSIGN), "assign"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ACCUM), "accum"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_RETRIEVE), "retrieve"); + + //retrieval + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET), "get"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET), "set"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_REPLACE), "replace"); + + //stack and node manipulation + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TARGET), "target"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TARGET_INDEX), "target_index"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TARGET_VALUE), "target_value"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_STACK), "stack"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ARGS), "args"); + + //simulation and operations + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_RAND), "rand"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_WEIGHTED_RAND), "weighted_rand"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_RAND_SEED), "get_rand_seed"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET_RAND_SEED), "set_rand_seed"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SYSTEM_TIME), "system_time"); + + //base math + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ADD), "+"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SUBTRACT), "-"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MULTIPLY), "*"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_DIVIDE), "/"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MODULUS), "mod"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_DIGITS), "get_digits"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET_DIGITS), "set_digits"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_FLOOR), "floor"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CEILING), "ceil"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ROUND), "round"); + + //extended math + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_EXPONENT), "exp"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LOG), "log"); + + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SIN), "sin"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ASIN), "asin"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_COS), "cos"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ACOS), "acos"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TAN), "tan"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ATAN), "atan"); + + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SINH), "sinh"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ASINH), "asinh"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_COSH), "cosh"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ACOSH), "acosh"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TANH), "tanh"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ATANH), "atanh"); + + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ERF), "erf"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TGAMMA), "tgamma"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LGAMMA), "lgamma"); + + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SQRT), "sqrt"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_POW), "pow"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ABS), "abs"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MAX), "max"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MIN), "min"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GENERALIZED_DISTANCE), "generalized_distance"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_DOT_PRODUCT), "dot_product"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ENTROPY), "entropy"); + + //list manipulation + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_FIRST), "first"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TAIL), "tail"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LAST), "last"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TRUNC), "trunc"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_APPEND), "append"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SIZE), "size"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_RANGE), "range"); + + //transformation + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_REWRITE), "rewrite"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MAP), "map"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_FILTER), "filter"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_WEAVE), "weave"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_REDUCE), "reduce"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_APPLY), "apply"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_REVERSE), "reverse"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SORT), "sort"); + + //associative list manipulation + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_INDICES), "indices"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_VALUES), "values"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CONTAINS_INDEX), "contains_index"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CONTAINS_VALUE), "contains_value"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_REMOVE), "remove"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_KEEP), "keep"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ASSOCIATE), "associate"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ZIP), "zip"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_UNZIP), "unzip"); + + //logic + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_AND), "and"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_OR), "or"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_XOR), "xor"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_NOT), "not"); + + //equivalence + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_EQUAL), "="); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_NEQUAL), "!="); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LESS), "<"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LEQUAL), "<="); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GREATER), ">"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GEQUAL), ">="); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TYPE_EQUALS), "~"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TYPE_NEQUALS), "!~"); + + //built-in constants and variables + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TRUE), "true"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_FALSE), "false"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_NULL), "null"); + + //data types + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LIST), "list"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ASSOC), "assoc"); + + //immediates - no associated keywords + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_NUMBER), "number"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_STRING), "string"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SYMBOL), "symbol"); + + //node types + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_TYPE), "get_type"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_TYPE_STRING), "get_type_string"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET_TYPE), "set_type"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_FORMAT), "format"); + + //labels and comments + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_LABELS), "get_labels"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_ALL_LABELS), "get_all_labels"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET_LABELS), "set_labels"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ZIP_LABELS), "zip_labels"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_COMMENTS), "get_comments"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET_COMMENTS), "set_comments"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_CONCURRENCY), "get_concurrency"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET_CONCURRENCY), "set_concurrency"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_VALUE), "get_value"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET_VALUE), "set_value"); + + //string + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_EXPLODE), "explode"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SPLIT), "split"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SUBSTR), "substr"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CONCAT), "concat"); + + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CRYPTO_SIGN), "crypto_sign"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CRYPTO_SIGN_VERIFY), "crypto_sign_verify"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ENCRYPT), "encrypt"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_DECRYPT), "decrypt"); + + //I/O + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_PRINT), "print"); + + //tree merging + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TOTAL_SIZE), "total_size"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_COMMONALITY), "commonality"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_EDIT_DISTANCE), "edit_distance"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MUTATE), "mutate"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_INTERSECT), "intersect"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_UNION), "union"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_DIFFERENCE), "difference"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MIX), "mix"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MIX_LABELS), "mix_labels"); + + //entity merging + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_TOTAL_ENTITY_SIZE), "total_entity_size"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_FLATTEN_ENTITY), "flatten_entity"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_COMMONALITY_ENTITIES), "commonality_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_EDIT_DISTANCE_ENTITIES), "edit_distance_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MUTATE_ENTITY), "mutate_entity"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_INTERSECT_ENTITIES), "intersect_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_UNION_ENTITIES), "union_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_DIFFERENCE_ENTITIES), "difference_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MIX_ENTITIES), "mix_entities"); + + //entity details + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_ENTITY_COMMENTS), "get_entity_comments"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_RETRIEVE_ENTITY_ROOT), "retrieve_entity_root"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ASSIGN_ENTITY_ROOTS), "assign_entity_roots"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ACCUM_ENTITY_ROOTS), "accum_entity_roots"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_ENTITY_RAND_SEED), "get_entity_rand_seed"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET_ENTITY_RAND_SEED), "set_entity_rand_seed"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_GET_ENTITY_ROOT_PERMISSION), "get_entity_root_permission"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_SET_ENTITY_ROOT_PERMISSION), "set_entity_root_permission"); + + //entity base actions + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CREATE_ENTITIES), "create_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CLONE_ENTITIES), "clone_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_MOVE_ENTITIES), "move_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_DESTROY_ENTITIES), "destroy_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LOAD), "load"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LOAD_ENTITY), "load_entity"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_LOAD_PERSISTENT_ENTITY), "load_persistent_entity"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_STORE), "store"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_STORE_ENTITY), "store_entity"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CONTAINS_ENTITY), "contains_entity"); + + //entity query + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CONTAINED_ENTITIES), "contained_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_COMPUTE_ON_CONTAINED_ENTITIES), "compute_on_contained_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_COUNT), "query_count"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_SELECT), "query_select"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_SAMPLE), "query_sample"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_WEIGHTED_SAMPLE), "query_weighted_sample"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_IN_ENTITY_LIST), "query_in_entity_list"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_NOT_IN_ENTITY_LIST), "query_not_in_entity_list"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_EXISTS), "query_exists"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_NOT_EXISTS), "query_not_exists"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_EQUALS), "query_equals"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_NOT_EQUALS), "query_not_equals"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_BETWEEN), "query_between"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_NOT_BETWEEN), "query_not_between"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_AMONG), "query_among"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_NOT_AMONG), "query_not_among"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_MAX), "query_max"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_MIN), "query_min"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_SUM), "query_sum"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_MODE), "query_mode"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_QUANTILE), "query_quantile"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_GENERALIZED_MEAN), "query_generalized_mean"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_MIN_DIFFERENCE), "query_min_difference"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_MAX_DIFFERENCE), "query_max_difference"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_VALUE_MASSES), "query_value_masses"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_LESS_OR_EQUAL_TO), "query_less_or_equal_to"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_GREATER_OR_EQUAL_TO), "query_greater_or_equal_to"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_WITHIN_GENERALIZED_DISTANCE), "query_within_generalized_distance"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_QUERY_NEAREST_GENERALIZED_DISTANCE), "query_nearest_generalized_distance"); + + //compute queries + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_COMPUTE_ENTITY_CONVICTIONS), "compute_entity_convictions"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE), "compute_entity_group_kl_divergence"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS), "compute_entity_distance_contributions"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_COMPUTE_ENTITY_KL_DIVERGENCES), "compute_entity_kl_divergences"); + + //entity access + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CONTAINS_LABEL), "contains_label"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ASSIGN_TO_ENTITIES), "assign_to_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_DIRECT_ASSIGN_TO_ENTITIES), "direct_assign_to_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_ACCUM_TO_ENTITIES), "accum_to_entities"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_RETRIEVE_FROM_ENTITY), "retrieve_from_entity"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_DIRECT_RETRIEVE_FROM_ENTITY), "direct_retrieve_from_entity"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CALL_ENTITY), "call_entity"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CALL_ENTITY_GET_CHANGES), "call_entity_get_changes"); + EmplaceStaticString(GetStringIdFromNodeTypeFromString(ENT_CALL_CONTAINER), "call_container"); + + //end opcodes + + //built-in common values + EmplaceStaticString(ENBISI_nan, ".nan"); + EmplaceStaticString(ENBISI_infinity, ".infinity"); + EmplaceStaticString(ENBISI_neg_infinity, "-.infinity"); + EmplaceStaticString(ENBISI_zero, "0"); + EmplaceStaticString(ENBISI_one, "1"); + EmplaceStaticString(ENBISI_neg_one, "-1"); + EmplaceStaticString(ENBISI_empty_null, "(null)"); + EmplaceStaticString(ENBISI_empty_list, "(list)"); + EmplaceStaticString(ENBISI_empty_assoc, "(assoc)"); + EmplaceStaticString(ENBISI_empty_true, "(true)"); + EmplaceStaticString(ENBISI_empty_false, "(false)"); + + //config file parameters + EmplaceStaticString(ENBISI_rand_seed, "rand_seed"); + + //substr parameters + EmplaceStaticString(ENBISI_all, "all"); + EmplaceStaticString(ENBISI_submatches, "submatches"); + + //dynamically generated function parameters + EmplaceStaticString(ENBISI__, "_"); + EmplaceStaticString(ENBISI_new_entity, "new_entity"); + + //entity access parameters + EmplaceStaticString(ENBISI_accessing_entity, "accessing_entity"); + + //distance types + EmplaceStaticString(ENBISI_nominal, "nominal"); + EmplaceStaticString(ENBISI_continuous, "continuous"); + EmplaceStaticString(ENBISI_cyclic, "cyclic"); + //string already an opcode + EmplaceStaticString(ENBISI_code, "code"); + + //distance parameter values + EmplaceStaticString(ENBISI_surprisal_to_prob, "surprisal_to_prob"); + + //numerical precision types + EmplaceStaticString(ENBISI_precise, "precise"); + EmplaceStaticString(ENBISI_fast, "fast"); + EmplaceStaticString(ENBISI_recompute_precise, "recompute_precise"); + + //format opcode types + EmplaceStaticString(ENBISI_Base16, "Base16"); + EmplaceStaticString(ENBISI_Base64, "Base64"); + EmplaceStaticString(ENBISI_int8, "int8"); + EmplaceStaticString(ENBISI_uint8, "uint8"); + EmplaceStaticString(ENBISI_int16, "int16"); + EmplaceStaticString(ENBISI_uint16, "uint16"); + EmplaceStaticString(ENBISI_int32, "int32"); + EmplaceStaticString(ENBISI_uint32, "uint32"); + EmplaceStaticString(ENBISI_int64, "int64"); + EmplaceStaticString(ENBISI_uint64, "uint64"); + EmplaceStaticString(ENBISI_float, "float"); + EmplaceStaticString(ENBISI_double, "double"); + EmplaceStaticString(ENBISI_INT8, "INT8"); + EmplaceStaticString(ENBISI_UINT8, "UINT8"); + EmplaceStaticString(ENBISI_INT16, "INT16"); + EmplaceStaticString(ENBISI_UINT16, "UINT16"); + EmplaceStaticString(ENBISI_INT32, "INT32"); + EmplaceStaticString(ENBISI_UINT32, "UINT32"); + EmplaceStaticString(ENBISI_INT64, "INT64"); + EmplaceStaticString(ENBISI_UINT64, "UINT64"); + EmplaceStaticString(ENBISI_FLOAT, "FLOAT"); + EmplaceStaticString(ENBISI_DOUBLE, "DOUBLE"); + EmplaceStaticString(ENBISI_json, "json"); + EmplaceStaticString(ENBISI_yaml, "yaml"); + + //formapt opcode params + EmplaceStaticString(ENBISI_sort_keys, "sort_keys"); + EmplaceStaticString(ENBISI_locale, "locale"); + EmplaceStaticString(ENBISI_timezone, "timezone"); + + //mutate opcode mutation types + EmplaceStaticString(ENBISI_change_type, "change_type"); + EmplaceStaticString(ENBISI_delete, "delete"); + EmplaceStaticString(ENBISI_insert, "insert"); + EmplaceStaticString(ENBISI_swap_elements, "swap_elements"); + EmplaceStaticString(ENBISI_deep_copy_elements, "deep_copy_elements"); + EmplaceStaticString(ENBISI_delete_elements, "delete_elements"); + EmplaceStaticString(ENBISI_change_label, "change_label"); +} diff --git a/src/Amalgam/Opcodes.h b/src/Amalgam/Opcodes.h new file mode 100644 index 00000000..2cb4a29e --- /dev/null +++ b/src/Amalgam/Opcodes.h @@ -0,0 +1,641 @@ +#pragma once + +//project headers: +#include "StringInternPool.h" + +//opcodes / commands / operations in Amalgam +enum EvaluableNodeType : uint8_t +{ + //built-in / system specific + ENT_SYSTEM, + ENT_GET_DEFAULTS, + + //parsing + ENT_PARSE, + ENT_UNPARSE, + + //core control + ENT_IF, + ENT_SEQUENCE, + ENT_PARALLEL, + ENT_LAMBDA, + ENT_CONCLUDE, + ENT_CALL, + ENT_CALL_SANDBOXED, + ENT_WHILE, + + //definitions + ENT_LET, + ENT_DECLARE, + ENT_ASSIGN, + ENT_ACCUM, + + //retrieval + ENT_RETRIEVE, + ENT_GET, + ENT_SET, + ENT_REPLACE, + + //stack and node manipulation + ENT_TARGET, + ENT_TARGET_INDEX, + ENT_TARGET_VALUE, + ENT_STACK, + ENT_ARGS, + + //simulation and operations + ENT_RAND, + ENT_WEIGHTED_RAND, + ENT_GET_RAND_SEED, + ENT_SET_RAND_SEED, + ENT_SYSTEM_TIME, + + //base math + ENT_ADD, + ENT_SUBTRACT, + ENT_MULTIPLY, + ENT_DIVIDE, + ENT_MODULUS, + ENT_GET_DIGITS, + ENT_SET_DIGITS, + ENT_FLOOR, + ENT_CEILING, + ENT_ROUND, + + //extended math + ENT_EXPONENT, + ENT_LOG, + + ENT_SIN, + ENT_ASIN, + ENT_COS, + ENT_ACOS, + ENT_TAN, + ENT_ATAN, + + ENT_SINH, + ENT_ASINH, + ENT_COSH, + ENT_ACOSH, + ENT_TANH, + ENT_ATANH, + + ENT_ERF, + ENT_TGAMMA, + ENT_LGAMMA, + + ENT_SQRT, + ENT_POW, + ENT_ABS, + ENT_MAX, + ENT_MIN, + ENT_DOT_PRODUCT, + ENT_GENERALIZED_DISTANCE, + ENT_ENTROPY, + + //list manipulation + ENT_FIRST, + ENT_TAIL, + ENT_LAST, + ENT_TRUNC, + ENT_APPEND, + ENT_SIZE, + ENT_RANGE, + + //transformation + ENT_REWRITE, + ENT_MAP, + ENT_FILTER, + ENT_WEAVE, + ENT_REDUCE, + ENT_APPLY, + ENT_REVERSE, + ENT_SORT, + + //associative list manipulation + ENT_INDICES, + ENT_VALUES, + ENT_CONTAINS_INDEX, + ENT_CONTAINS_VALUE, + ENT_REMOVE, + ENT_KEEP, + ENT_ASSOCIATE, + ENT_ZIP, + ENT_UNZIP, + + //logic + ENT_AND, + ENT_OR, + ENT_XOR, + ENT_NOT, + + //equivalence + ENT_EQUAL, + ENT_NEQUAL, + ENT_LESS, + ENT_LEQUAL, + ENT_GREATER, + ENT_GEQUAL, + ENT_TYPE_EQUALS, + ENT_TYPE_NEQUALS, + + //built-in constants and variables + ENT_TRUE, + ENT_FALSE, + ENT_NULL, + + //data types + ENT_LIST, + ENT_ASSOC, + ENT_NUMBER, + ENT_STRING, + ENT_SYMBOL, + + //node types + ENT_GET_TYPE, + ENT_GET_TYPE_STRING, + ENT_SET_TYPE, + ENT_FORMAT, + + //EvaluableNode management: labels, comments, and concurrency + ENT_GET_LABELS, + ENT_GET_ALL_LABELS, + ENT_SET_LABELS, + ENT_ZIP_LABELS, + + ENT_GET_COMMENTS, + ENT_SET_COMMENTS, + + ENT_GET_CONCURRENCY, + ENT_SET_CONCURRENCY, + + ENT_GET_VALUE, + ENT_SET_VALUE, + + //string + ENT_EXPLODE, + ENT_SPLIT, + ENT_SUBSTR, + ENT_CONCAT, + + //encryption + ENT_CRYPTO_SIGN, + ENT_CRYPTO_SIGN_VERIFY, + ENT_ENCRYPT, + ENT_DECRYPT, + + //I/O + ENT_PRINT, + + //tree merging + ENT_TOTAL_SIZE, + ENT_MUTATE, + ENT_COMMONALITY, + ENT_EDIT_DISTANCE, + ENT_INTERSECT, + ENT_UNION, + ENT_DIFFERENCE, + ENT_MIX, + ENT_MIX_LABELS, + + //entity merging + ENT_TOTAL_ENTITY_SIZE, + ENT_FLATTEN_ENTITY, + ENT_MUTATE_ENTITY, + ENT_COMMONALITY_ENTITIES, + ENT_EDIT_DISTANCE_ENTITIES, + ENT_INTERSECT_ENTITIES, + ENT_UNION_ENTITIES, + ENT_DIFFERENCE_ENTITIES, + ENT_MIX_ENTITIES, + + //entity details + ENT_GET_ENTITY_COMMENTS, + ENT_RETRIEVE_ENTITY_ROOT, + ENT_ASSIGN_ENTITY_ROOTS, + ENT_ACCUM_ENTITY_ROOTS, + ENT_GET_ENTITY_RAND_SEED, + ENT_SET_ENTITY_RAND_SEED, + ENT_GET_ENTITY_ROOT_PERMISSION, + ENT_SET_ENTITY_ROOT_PERMISSION, + + //entity base actions + ENT_CREATE_ENTITIES, + ENT_CLONE_ENTITIES, + ENT_MOVE_ENTITIES, + ENT_DESTROY_ENTITIES, + ENT_LOAD, + ENT_LOAD_ENTITY, + ENT_LOAD_PERSISTENT_ENTITY, + ENT_STORE, + ENT_STORE_ENTITY, + ENT_CONTAINS_ENTITY, + + //entity query + ENT_CONTAINED_ENTITIES, + ENT_COMPUTE_ON_CONTAINED_ENTITIES, + ENT_QUERY_SELECT, + ENT_QUERY_SAMPLE, + ENT_QUERY_WEIGHTED_SAMPLE, + ENT_QUERY_IN_ENTITY_LIST, + ENT_QUERY_NOT_IN_ENTITY_LIST, + ENT_QUERY_COUNT, + ENT_QUERY_EXISTS, + ENT_QUERY_NOT_EXISTS, + ENT_QUERY_EQUALS, + ENT_QUERY_NOT_EQUALS, + ENT_QUERY_BETWEEN, + ENT_QUERY_NOT_BETWEEN, + ENT_QUERY_AMONG, + ENT_QUERY_NOT_AMONG, + ENT_QUERY_MAX, + ENT_QUERY_MIN, + ENT_QUERY_SUM, + ENT_QUERY_MODE, + ENT_QUERY_QUANTILE, + ENT_QUERY_GENERALIZED_MEAN, + ENT_QUERY_MIN_DIFFERENCE, + ENT_QUERY_MAX_DIFFERENCE, + ENT_QUERY_VALUE_MASSES, + ENT_QUERY_GREATER_OR_EQUAL_TO, + ENT_QUERY_LESS_OR_EQUAL_TO, + ENT_QUERY_WITHIN_GENERALIZED_DISTANCE, + ENT_QUERY_NEAREST_GENERALIZED_DISTANCE, + + //aggregate analysis entity query + ENT_COMPUTE_ENTITY_CONVICTIONS, + ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE, + ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS, + ENT_COMPUTE_ENTITY_KL_DIVERGENCES, + + //entity access + ENT_CONTAINS_LABEL, + ENT_ASSIGN_TO_ENTITIES, + ENT_DIRECT_ASSIGN_TO_ENTITIES, + ENT_ACCUM_TO_ENTITIES, + ENT_RETRIEVE_FROM_ENTITY, + ENT_DIRECT_RETRIEVE_FROM_ENTITY, + ENT_CALL_ENTITY, + ENT_CALL_ENTITY_GET_CHANGES, + ENT_CALL_CONTAINER, + + //not in active memory + //freed and no longer in use + ENT_DEALLOCATED, + //allocated, but not in use yet + ENT_UNINITIALIZED, + + //something went wrong - maximum value + ENT_NOT_A_BUILT_IN_TYPE, +}; + +//total number of opcodes +constexpr size_t NUM_ENT_OPCODES = ENT_NOT_A_BUILT_IN_TYPE; +//total number of valid opcodes +constexpr size_t NUM_VALID_ENT_OPCODES = ENT_DEALLOCATED; + + +//Different arrangements of ordered parameters +enum OrderedChildNodeType +{ + OCNT_UNORDERED, + OCNT_ORDERED, + OCNT_ONE_POSITION_THEN_ORDERED, + OCNT_PAIRED, + OCNT_ONE_POSITION_THEN_PAIRED, + OCNT_POSITION +}; + +//Returns the type of structure that the ordered child nodes have for a given type +constexpr OrderedChildNodeType GetInstructionOrderedChildNodeType(EvaluableNodeType t) +{ + switch(t) + { + case ENT_PARALLEL: + case ENT_ADD: + case ENT_MULTIPLY: + case ENT_MAX: case ENT_MIN: + case ENT_AND: case ENT_OR: case ENT_XOR: + case ENT_EQUAL: case ENT_NEQUAL: + case ENT_NULL: + case ENT_DESTROY_ENTITIES: + return OCNT_UNORDERED; + + case ENT_SYSTEM: + case ENT_GET_DEFAULTS: + case ENT_SEQUENCE: + case ENT_APPEND: case ENT_FILTER: case ENT_SORT: + case ENT_ZIP: case ENT_UNZIP: + case ENT_LESS: case ENT_LEQUAL: + case ENT_GREATER: case ENT_GEQUAL: case ENT_TYPE_EQUALS: case ENT_TYPE_NEQUALS: + case ENT_TRUE: case ENT_FALSE: + case ENT_LIST: + case ENT_CONCAT: + case ENT_PRINT: + case ENT_ASSIGN_ENTITY_ROOTS: case ENT_ACCUM_ENTITY_ROOTS: + case ENT_SET_ENTITY_RAND_SEED: + case ENT_CREATE_ENTITIES: + case ENT_CONTAINED_ENTITIES: case ENT_COMPUTE_ON_CONTAINED_ENTITIES: + case ENT_QUERY_SELECT: case ENT_QUERY_SAMPLE: case ENT_QUERY_WEIGHTED_SAMPLE: + case ENT_QUERY_IN_ENTITY_LIST: case ENT_QUERY_NOT_IN_ENTITY_LIST: case ENT_QUERY_COUNT: + case ENT_QUERY_EXISTS: case ENT_QUERY_NOT_EXISTS: + case ENT_QUERY_EQUALS: case ENT_QUERY_NOT_EQUALS: + case ENT_QUERY_BETWEEN: case ENT_QUERY_NOT_BETWEEN: + case ENT_QUERY_AMONG: case ENT_QUERY_NOT_AMONG: + case ENT_QUERY_MAX: case ENT_QUERY_MIN: + case ENT_QUERY_SUM: case ENT_QUERY_MODE: + case ENT_QUERY_QUANTILE: case ENT_QUERY_GENERALIZED_MEAN: + case ENT_QUERY_MIN_DIFFERENCE: case ENT_QUERY_MAX_DIFFERENCE: + case ENT_QUERY_VALUE_MASSES: + case ENT_QUERY_GREATER_OR_EQUAL_TO: case ENT_QUERY_LESS_OR_EQUAL_TO: + case ENT_QUERY_WITHIN_GENERALIZED_DISTANCE: case ENT_QUERY_NEAREST_GENERALIZED_DISTANCE: + case ENT_COMPUTE_ENTITY_CONVICTIONS: case ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE: + case ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS: case ENT_COMPUTE_ENTITY_KL_DIVERGENCES: + case ENT_CONTAINS_LABEL: case ENT_ASSIGN_TO_ENTITIES: case ENT_DIRECT_ASSIGN_TO_ENTITIES: + case ENT_ACCUM_TO_ENTITIES: case ENT_RETRIEVE_FROM_ENTITY: case ENT_DIRECT_RETRIEVE_FROM_ENTITY: + case ENT_CALL_ENTITY: case ENT_CALL_ENTITY_GET_CHANGES: case ENT_CALL_CONTAINER: + return OCNT_ORDERED; + + case ENT_WHILE: case ENT_LET: case ENT_DECLARE: case ENT_SUBTRACT: + case ENT_DIVIDE: case ENT_MODULUS: + return OCNT_ONE_POSITION_THEN_ORDERED; + + case ENT_ASSOC: case ENT_ASSOCIATE: + return OCNT_PAIRED; + + case ENT_ASSIGN: case ENT_ACCUM: + case ENT_SET: case ENT_REPLACE: + return OCNT_ONE_POSITION_THEN_PAIRED; + + case ENT_PARSE: case ENT_UNPARSE: case ENT_IF: case ENT_LAMBDA: + case ENT_CONCLUDE: + case ENT_CALL: case ENT_CALL_SANDBOXED: + case ENT_RETRIEVE: + case ENT_GET: + case ENT_TARGET: case ENT_TARGET_INDEX: case ENT_TARGET_VALUE: + case ENT_STACK: case ENT_ARGS: + case ENT_RAND: case ENT_WEIGHTED_RAND: case ENT_GET_RAND_SEED: case ENT_SET_RAND_SEED: + case ENT_SYSTEM_TIME: + case ENT_GET_DIGITS: case ENT_SET_DIGITS: + case ENT_FLOOR: case ENT_CEILING: case ENT_ROUND: + case ENT_SIN: case ENT_ASIN: case ENT_COS: case ENT_ACOS: + case ENT_EXPONENT: case ENT_LOG: case ENT_TAN: + case ENT_ATAN: + case ENT_SINH: case ENT_ASINH: case ENT_COSH: case ENT_ACOSH: + case ENT_TANH: case ENT_ATANH: + case ENT_ERF: case ENT_TGAMMA: case ENT_LGAMMA: + case ENT_SQRT: case ENT_POW: case ENT_ABS: + case ENT_DOT_PRODUCT: case ENT_GENERALIZED_DISTANCE: case ENT_ENTROPY: + case ENT_FIRST: case ENT_TAIL: case ENT_LAST: case ENT_TRUNC: + case ENT_SIZE: case ENT_RANGE: + case ENT_REWRITE: case ENT_MAP: case ENT_WEAVE: + case ENT_REDUCE: case ENT_APPLY: case ENT_REVERSE: + case ENT_INDICES: + case ENT_VALUES: case ENT_CONTAINS_INDEX: case ENT_CONTAINS_VALUE: + case ENT_REMOVE: case ENT_KEEP: + case ENT_NOT: + case ENT_NUMBER: case ENT_STRING: + case ENT_SYMBOL: + case ENT_GET_TYPE: case ENT_GET_TYPE_STRING: case ENT_SET_TYPE: case ENT_FORMAT: + case ENT_GET_LABELS: case ENT_GET_ALL_LABELS: case ENT_SET_LABELS: case ENT_ZIP_LABELS: + case ENT_GET_COMMENTS: case ENT_SET_COMMENTS: + case ENT_GET_CONCURRENCY: case ENT_SET_CONCURRENCY: + case ENT_GET_VALUE: case ENT_SET_VALUE: + case ENT_EXPLODE: case ENT_SPLIT: case ENT_SUBSTR: + case ENT_CRYPTO_SIGN: case ENT_CRYPTO_SIGN_VERIFY: + case ENT_ENCRYPT: case ENT_DECRYPT: + case ENT_TOTAL_SIZE: case ENT_COMMONALITY: case ENT_EDIT_DISTANCE: case ENT_MUTATE: + case ENT_INTERSECT: case ENT_UNION: case ENT_DIFFERENCE: + case ENT_MIX: case ENT_MIX_LABELS: + case ENT_TOTAL_ENTITY_SIZE: case ENT_FLATTEN_ENTITY: case ENT_MUTATE_ENTITY: + case ENT_COMMONALITY_ENTITIES: + case ENT_INTERSECT_ENTITIES: case ENT_UNION_ENTITIES: case ENT_DIFFERENCE_ENTITIES: + case ENT_MIX_ENTITIES: + case ENT_GET_ENTITY_COMMENTS: + case ENT_RETRIEVE_ENTITY_ROOT: + case ENT_GET_ENTITY_RAND_SEED: + case ENT_GET_ENTITY_ROOT_PERMISSION: case ENT_SET_ENTITY_ROOT_PERMISSION: + case ENT_CLONE_ENTITIES: case ENT_MOVE_ENTITIES: + case ENT_LOAD: case ENT_LOAD_ENTITY: case ENT_LOAD_PERSISTENT_ENTITY: + case ENT_STORE_ENTITY: case ENT_STORE: + case ENT_CONTAINS_ENTITY: + return OCNT_POSITION; + + default: + return OCNT_POSITION; + } +} + +//Returns true if the instruction uses an associative array as parameters. If false, then a regular kind of list +constexpr bool DoesInstructionUseAssocParameters(EvaluableNodeType t) +{ + return GetInstructionOrderedChildNodeType(t) == OCNT_PAIRED; +} + +//Returns true if the type is an immediate value +constexpr bool IsEvaluableNodeTypeImmediate(EvaluableNodeType t) +{ + return (t == ENT_NUMBER || t == ENT_STRING || t == ENT_SYMBOL); +} + +//Returns true if the type uses string data +constexpr bool DoesEvaluableNodeTypeUseStringData(EvaluableNodeType t) +{ + return (t == ENT_STRING || t == ENT_SYMBOL); +} + +//Returns true if the type uses number data +constexpr bool DoesEvaluableNodeTypeUseNumberData(EvaluableNodeType t) +{ + return (t == ENT_NUMBER); +} + +//Returns true if the type uses association data +constexpr bool DoesEvaluableNodeTypeUseAssocData(EvaluableNodeType t) +{ + return (t == ENT_ASSOC); +} + +//Returns true if the type uses ordered data (doesn't use any other type) +constexpr bool DoesEvaluableNodeTypeUseOrderedData(EvaluableNodeType t) +{ + return (!IsEvaluableNodeTypeImmediate(t) && !DoesEvaluableNodeTypeUseAssocData(t)); +} + +//returns true if the type is a query +constexpr bool IsEvaluableNodeTypeQuery(EvaluableNodeType type) +{ + return (type == ENT_QUERY_SELECT || type == ENT_QUERY_IN_ENTITY_LIST || type == ENT_QUERY_NOT_IN_ENTITY_LIST || type == ENT_QUERY_COUNT + || type == ENT_QUERY_SAMPLE || type == ENT_QUERY_WEIGHTED_SAMPLE || type == ENT_QUERY_EXISTS || type == ENT_QUERY_NOT_EXISTS + || type == ENT_QUERY_EQUALS || type == ENT_QUERY_NOT_EQUALS + || type == ENT_QUERY_BETWEEN || type == ENT_QUERY_NOT_BETWEEN || type == ENT_QUERY_AMONG || type == ENT_QUERY_NOT_AMONG + || type == ENT_QUERY_MAX || type == ENT_QUERY_MIN || type == ENT_QUERY_SUM || type == ENT_QUERY_MODE + || type == ENT_QUERY_QUANTILE || type == ENT_QUERY_GENERALIZED_MEAN + || type == ENT_QUERY_MIN_DIFFERENCE || type == ENT_QUERY_MAX_DIFFERENCE || type == ENT_QUERY_VALUE_MASSES + || type == ENT_QUERY_LESS_OR_EQUAL_TO || type == ENT_QUERY_GREATER_OR_EQUAL_TO + || type == ENT_QUERY_WITHIN_GENERALIZED_DISTANCE || type == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE + || type == ENT_COMPUTE_ENTITY_CONVICTIONS || type == ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE + || type == ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS || type == ENT_COMPUTE_ENTITY_KL_DIVERGENCES + ); +} + +//returns true if the type could potentially be idempotent +constexpr bool IsEvaluableNodeTypePotentiallyIdempotent(EvaluableNodeType type) +{ + return (type == ENT_NUMBER || type == ENT_STRING + || type == ENT_TRUE || type == ENT_FALSE + || type == ENT_NULL || type == ENT_LIST || type == ENT_ASSOC + || IsEvaluableNodeTypeQuery(type)); +} + +constexpr bool IsEvaluableNodeTypeValid(EvaluableNodeType t) +{ + return (t < NUM_VALID_ENT_OPCODES); +} + +//covers ENBISI_NOT_A_STRING and ENBISI_EMPTY_STRING +constexpr size_t NUM_ENBISI_SPECIAL_STRING_IDS = 2; + +//ids of built-in strings +enum EvaluableNodeBuiltInStringId +{ + ENBISI_NOT_A_STRING = 0, + ENBISI_EMPTY_STRING = 1, + + //leave space for ENT_ opcodes, start at the end + + //built-in common values + ENBISI_nas = NUM_VALID_ENT_OPCODES + NUM_ENBISI_SPECIAL_STRING_IDS, + ENBISI_nan, + ENBISI_infinity, + ENBISI_neg_infinity, + ENBISI_zero, + ENBISI_one, + ENBISI_neg_one, + ENBISI_empty_null, + ENBISI_empty_list, + ENBISI_empty_assoc, + ENBISI_empty_true, + ENBISI_empty_false, + + //config file parameters + ENBISI_rand_seed, + + //substr parameters + ENBISI_all, + ENBISI_submatches, + + //dynamically generated function parameters + ENBISI__, + ENBISI_new_entity, + + //entity access parameters + ENBISI_accessing_entity, + + //distance types + ENBISI_nominal, + ENBISI_continuous, + ENBISI_cyclic, + //ENBISI_string, //string is already covered + ENBISI_code, + + //distance parameter values + ENBISI_surprisal_to_prob, + + //numerical precision types + ENBISI_precise, + ENBISI_fast, + ENBISI_recompute_precise, + + //format opcode types + ENBISI_Base16, + ENBISI_Base64, + ENBISI_int8, + ENBISI_uint8, + ENBISI_int16, + ENBISI_uint16, + ENBISI_int32, + ENBISI_uint32, + ENBISI_int64, + ENBISI_uint64, + ENBISI_float, + ENBISI_double, + ENBISI_INT8, + ENBISI_UINT8, + ENBISI_INT16, + ENBISI_UINT16, + ENBISI_INT32, + ENBISI_UINT32, + ENBISI_INT64, + ENBISI_UINT64, + ENBISI_FLOAT, + ENBISI_DOUBLE, + ENBISI_json, + ENBISI_yaml, + + //format opcode params + ENBISI_sort_keys, + ENBISI_locale, + ENBISI_timezone, + + //mutate opcode mutation types + ENBISI_change_type, + ENBISI_delete, + ENBISI_insert, + ENBISI_swap_elements, + ENBISI_deep_copy_elements, + ENBISI_delete_elements, + ENBISI_change_label, + + //enumeration of the first string that isn't static + ENBISI_FIRST_DYNAMIC_STRING +}; + + +//returns the string id representing EvaluableNodeType t +constexpr StringInternPool::StringID GetStringIdFromNodeTypeFromString(EvaluableNodeType t) +{ + if(t >= NUM_VALID_ENT_OPCODES) + return ENT_NOT_A_BUILT_IN_TYPE; + return static_cast(t + NUM_ENBISI_SPECIAL_STRING_IDS); +} + +//like GetEvaluableNodeTypeFromString but uses a string id +constexpr EvaluableNodeType GetEvaluableNodeTypeFromStringId(StringInternPool::StringID sid) +{ + if(sid <= ENBISI_EMPTY_STRING) + return ENT_NOT_A_BUILT_IN_TYPE; + + size_t type_index = sid - NUM_ENBISI_SPECIAL_STRING_IDS; + if(type_index >= NUM_VALID_ENT_OPCODES) + return ENT_NOT_A_BUILT_IN_TYPE; + + return static_cast(type_index); +} + +//returns a string of the enumerated type specified +// if get_non_keywords is true, then it will return types that are not necessarily keywords, like number +inline std::string GetStringFromEvaluableNodeType(EvaluableNodeType t, bool get_non_keywords = false) +{ + if(!get_non_keywords && IsEvaluableNodeTypeImmediate(t)) + return ""; + + if(t >= NUM_VALID_ENT_OPCODES) + { + assert(false); + return ""; + } + + return string_intern_pool.GetStringFromID(GetStringIdFromNodeTypeFromString(t)); +} + +//returns the enumerated type for the string +// if get_non_keywords is true, then it will return types that are not necessarily keywords, like number +inline EvaluableNodeType GetEvaluableNodeTypeFromString(const std::string &s, bool get_non_keywords = false) +{ + auto sid = string_intern_pool.GetIDFromString(s); + if(sid == string_intern_pool.NOT_A_STRING_ID) + return ENT_NOT_A_BUILT_IN_TYPE; + + return GetEvaluableNodeTypeFromStringId(sid); +} diff --git a/src/Amalgam/Parser.cpp b/src/Amalgam/Parser.cpp new file mode 100644 index 00000000..36975d29 --- /dev/null +++ b/src/Amalgam/Parser.cpp @@ -0,0 +1,1056 @@ +//project headers: +#include "Parser.h" + +#include "EvaluableNode.h" +#include "EvaluableNodeTreeFunctions.h" +#include "StringManipulation.h" + +//system headers: +#include +#include + +Parser::Parser() +{ + pos = 0; + lineNumber = 0; + lineStartPos = 0; +} + +std::string Parser::Backslashify(const std::string &s) +{ + if(s.size() == 0) + return std::string(); + + //copy into string b, Backslashifying + std::string b; + //give it two extra characters, the worst highly likely case for needing backslashes (e.g., surrounded by quotes) + b.reserve(s.size() + 2); + for(auto c : s) + { + switch(c) + { + case '\0': + b.append("\\0"); + break; + case '\\': + b.append("\\\\"); + break; + case '"': + b.append("\\\""); + break; + case '\t': + b.append("\\t"); + break; + case '\n': + b.append("\\n"); + break; + case '\r': + b.append("\\r"); + break; + default: + b.push_back(c); + break; + } + } + + return b; +} + +EvaluableNodeReference Parser::Parse(std::string &code_string, EvaluableNodeManager *enm, std::string *original_source) +{ + Parser pt; + pt.code = &code_string; + pt.pos = 0; + pt.preevaluationNodes.clear(); + pt.evaluableNodeManager = enm; + + pt.originalSource = ""; + if(original_source != nullptr) + { + //convert source to minimal absolute path + std::filesystem::path p = *original_source; + try + { + pt.originalSource = std::filesystem::canonical(p).string(); + } + catch(std::filesystem::filesystem_error &e) + { + //file doesn't exist + pt.originalSource = e.what(); + } + } + + EvaluableNode *parse_tree = pt.ParseNextBlock(); + + pt.PreevaluateNodes(); + EvaluableNodeManager::UpdateFlagsForNodeTree(parse_tree); + + return EvaluableNodeReference(parse_tree, true); +} + +std::string Parser::Unparse(EvaluableNode *tree, EvaluableNodeManager *enm, + bool expanded_whitespace, bool emit_attributes, bool sort_keys) +{ + UnparseData upd; + upd.enm = enm; + //if the top node needs cycle checks, then need to check all nodes in case there are + // multiple ways to get to one + upd.cycleFree = (tree == nullptr || !tree->GetNeedCycleCheck()); + upd.preevaluationNeeded = false; + upd.emitAttributes = emit_attributes; + upd.sortKeys = sort_keys; + Unparse(upd, tree, nullptr, expanded_whitespace, 0, false); + return upd.result; +} + +EvaluableNode *Parser::GetCodeForPathFromAToB(UnparseData &upd, EvaluableNode *a, EvaluableNode *b) +{ + if(a == nullptr || b == nullptr) + return nullptr; + + //climb back from current tree to top level parent; get ancestor (for comparison with b's) and how far back it is + EvaluableNode *a_ancestor = a; + EvaluableNode *a_ancestor_parent = upd.parentNodes[a_ancestor]; + int64_t a_ancestor_depth = 0; + EvaluableNode::ReferenceSetType nodes_visited; + while(a_ancestor_parent != nullptr + && a_ancestor != b //stop if it's the target + && nodes_visited.insert(a_ancestor_parent).second == true) //make sure not visited yet + { + //climb back up one level + a_ancestor_depth++; + a_ancestor = a_ancestor_parent; + a_ancestor_parent = upd.parentNodes[a_ancestor]; + } + + //find way from b (as previously defined) back to ancestor + EvaluableNode *b_ancestor = b; + EvaluableNode *b_ancestor_parent = upd.parentNodes[b_ancestor]; + nodes_visited.clear(); + std::vector b_path_nodes; + while(b_ancestor_parent != nullptr + && b_ancestor != a_ancestor //stop if it's the target + && nodes_visited.insert(b_ancestor_parent).second == true) //make sure not visited yet + { + + EvaluableNode *lookup = upd.enm->AllocNode(ENT_GET); + lookup->AppendOrderedChildNode(nullptr); //placeholder for the object to use when assembling chain later + + //each kind of child nodes + if(b_ancestor_parent->IsAssociativeArray()) + { + StringInternPool::StringID key_id = StringInternPool::NOT_A_STRING_ID; + auto &bap_mcn = b_ancestor_parent->GetMappedChildNodesReference(); + if(!upd.sortKeys) + { + //look up which key corresponds to the value + for(auto &[s_id, s] : b_ancestor_parent->GetMappedChildNodesReference()) + { + if(s == b_ancestor) + { + key_id = s_id; + break; + } + } + } + else //sortKeys + { + std::vector key_sids; + key_sids.reserve(bap_mcn.size()); + for(auto &[k_id, _] : bap_mcn) + key_sids.push_back(k_id); + + std::sort(begin(key_sids), end(key_sids), StringIDNaturalCompareSort); + + for(auto &key_sid : key_sids) + { + auto k = bap_mcn.find(key_sid); + if(k->second == b_ancestor) + { + key_id = k->first; + break; + } + } + } + + lookup->AppendOrderedChildNode(upd.enm->AllocNode(ENT_STRING, key_id)); + } + else if(b_ancestor_parent->IsOrderedArray()) + { + auto &bap_ocn = b_ancestor_parent->GetOrderedChildNodesReference(); + const auto &found = std::find(begin(bap_ocn), end(bap_ocn), b_ancestor); + auto index = std::distance(begin(bap_ocn), found); + lookup->AppendOrderedChildNode(upd.enm->AllocNode(static_cast(index))); + } + else //didn't work... odd/error condition + { + delete lookup; + return nullptr; + } + + b_path_nodes.push_back(lookup); + b_ancestor = b_ancestor_parent; + b_ancestor_parent = upd.parentNodes[b_ancestor]; + } + + //make sure common ancestor is the same (otherwise return null) + if(a_ancestor != b_ancestor) + return nullptr; + + //build code to get the reference + EvaluableNode *refpath = upd.enm->AllocNode(ENT_TARGET); + refpath->AppendOrderedChildNode(upd.enm->AllocNode(a_ancestor_depth)); + + //combine together + while(b_path_nodes.size() > 0) + { + //pull off the end of b + EvaluableNode *next = b_path_nodes.back(); + b_path_nodes.pop_back(); + + next->GetOrderedChildNodes()[0] = refpath; + refpath = next; + } + + return refpath; +} + +void Parser::SkipWhitespaceAndAccumulateAttributes(EvaluableNode *target) +{ + while(pos < code->size()) + { + //eat any whitespace + if(StringManipulation::IsUtf8Whitespace(*code, pos)) + { + if(StringManipulation::IsUtf8Newline(*code, pos)) + { + lineNumber++; + lineStartPos = pos + 1; + } + + pos++; + continue; + } + + auto cur_char = code->at(pos); + + //if it's a label, grab the label + if(cur_char == '#') + { + pos++; //skip hash + + //add to labels list + target->AppendLabel(GetNextIdentifier(true)); + + continue; + } + + //if it's a comment, grab everything until the end of line + if(cur_char == ';') + { + pos++; //skip semicolon + + //add on characters until end of line + size_t start_pos = pos; + while(pos < code->size()) + { + cur_char = code->at(pos); + if(cur_char != '\r' && cur_char != '\n') + pos++; + else + break; + } + + std::string cur_comment; + //prepend the comment with newlines if there is already a comment on the node + if(target->GetCommentsStringId() != StringInternPool::NOT_A_STRING_ID) + cur_comment = "\r\n"; + cur_comment.append(code->substr(start_pos, pos - start_pos)); + + target->AppendComments(cur_comment); + continue; + } + + //if it's a concurrent marker, set the property + if(cur_char == '|' && pos + 1 < code->size() && code->at(pos + 1) == '|') + { + pos += 2; //skip || + target->SetConcurrency(true); + continue; + } + + if(cur_char == '@') + { + pos++; //skip @ + preevaluationNodes.push_back(target); + continue; + } + + //not caught, so exit + break; + } + + //if labeling source, prepend as comment + if(originalSource.size() > 0) + { + std::string new_comment = sourceCommentPrefix; + new_comment += std::to_string(lineNumber); + new_comment += ' '; + + std::string_view line_to_opcode(&(*code)[lineStartPos], pos - lineStartPos); + size_t column_number = StringManipulation::GetNumUTF8Characters(line_to_opcode); + + new_comment += std::to_string(column_number); + new_comment += ' '; + new_comment += originalSource; + new_comment += "\r\n"; + if(target->HasComments()) + new_comment += target->GetCommentsString(); + target->SetComments(new_comment); + } +} + +std::string Parser::ParseString() +{ + pos++; + + std::string s; + while(pos < code->size()) + { + auto cur_char = code->at(pos); + + if(cur_char == '"') + break; + + if(cur_char != '\\') + { + s.push_back(cur_char); + pos++; + } + else //escaped character + { + pos++; + if(pos < code->size()) + { + cur_char = code->at(pos); + switch(cur_char) + { + case '0': + s.push_back('\0'); + break; + case '"': + s.push_back('"'); + break; + case 't': + s.push_back('\t'); + break; + case 'n': + s.push_back('\n'); + break; + case 'r': + s.push_back('\r'); + break; + default: + s.push_back(cur_char); + break; + } + } + pos++; + } + } + + pos++; //skip last double quote + return s; +} + +void Parser::SkipToEndOfIdentifier(bool allow_leading_label_marks) +{ + //eat any label marks + if(allow_leading_label_marks) + { + while(pos < code->size() && code->at(pos) == '#') + pos++; + } + + //eat all characters until one that indicates end of identifier + while(pos < code->size()) + { + auto cur_char = code->at(pos); + if(!std::isspace(static_cast(cur_char)) + && cur_char != ')' + && cur_char != '(' + && cur_char != '#' + && cur_char != ';') + pos++; + else + break; + } +} + +std::string Parser::GetNextIdentifier(bool allow_leading_label_marks) +{ + if(pos >= code->size()) + return std::string(); + + //if quoted string, then go until the next end quote + if(code->at(pos) == '"') + return ParseString(); + else + { + size_t start_pos = pos; + SkipToEndOfIdentifier(allow_leading_label_marks); + return code->substr(start_pos, pos - start_pos); + } +} + +EvaluableNode *Parser::GetNextToken(EvaluableNode *new_token) +{ + if(new_token == nullptr) + new_token = evaluableNodeManager->AllocNode(ENT_NULL); + + SkipWhitespaceAndAccumulateAttributes(new_token); + if(pos >= code->size()) + { + FreeNode(new_token); + return nullptr; + } + + auto cur_char = code->at(pos); + + if(cur_char == '(') //identifier as command + { + pos++; + SkipWhitespaceAndAccumulateAttributes(new_token); + if(pos >= code->size()) + { + FreeNode(new_token); + return nullptr; + } + + std::string token = GetNextIdentifier(); + //first see if it's a keyword + new_token->SetType(GetEvaluableNodeTypeFromString(token), evaluableNodeManager, false); + if(IsEvaluableNodeTypeValid(new_token->GetType())) + return new_token; + + //unspecified command, store the identifier in the string + new_token->SetType(ENT_STRING, evaluableNodeManager, false); + new_token->SetStringValue(token); + return new_token; + } + else if(cur_char == ')') + { + pos++; //skip closing parenthesis + FreeNode(new_token); + return nullptr; + } + else if(std::isdigit(static_cast(cur_char)) || cur_char == '-' || cur_char == '.') + { + size_t start_pos = pos; + SkipToEndOfIdentifier(); + std::string s = code->substr(start_pos, pos - start_pos); + + //check for special values + double value = 0.0; + if(s == ".nas") + { + new_token->SetType(ENT_STRING, evaluableNodeManager, false); + new_token->SetStringID(StringInternPool::NOT_A_STRING_ID); + return new_token; + } + if(s == ".infinity") + value = std::numeric_limits::infinity(); + else if(s == "-.infinity") + value = -std::numeric_limits::infinity(); + else if(s == ".nan") + value = std::numeric_limits::quiet_NaN(); + else + { + auto [converted_value, success] = Platform_StringToNumber(s); + if(success) + value = converted_value; + } + + new_token->SetType(ENT_NUMBER, evaluableNodeManager, false); + new_token->SetNumberValue(value); + return new_token; + } + else if(cur_char == '"') + { + new_token->SetType(ENT_STRING, evaluableNodeManager, false); + new_token->SetStringValue(ParseString()); + return new_token; + } + else //identifier + { + //store the identifier + new_token->SetType(ENT_SYMBOL, evaluableNodeManager, false); + new_token->SetStringValue(GetNextIdentifier()); + return new_token; + } +} + +void Parser::FreeNode(EvaluableNode *node) +{ + evaluableNodeManager->FreeNode(node); + if(preevaluationNodes.size() > 0 && preevaluationNodes.back() == node) + preevaluationNodes.pop_back(); +} + +EvaluableNode *Parser::ParseNextBlock() +{ + EvaluableNode *tree_top = nullptr; + EvaluableNode *curnode = nullptr; + + //as long as code left + while(pos < code->size()) + { + EvaluableNode *n = GetNextToken(); + + //if end of a list + if(n == nullptr) + { + //nothing here at all + if(curnode == nullptr) + return nullptr; + + const auto &parent = parentNodes.find(curnode); + + //if no parent, then all finished + if(parent == end(parentNodes) || parent->second == nullptr) + return tree_top; + + //jump up to the parent node + curnode = parent->second; + continue; + } + else //got some token + { + //if it's the first token, then put it up top + if(tree_top == nullptr) + { + tree_top = n; + curnode = n; + continue; + } + + if(curnode->IsOrderedArray()) + { + curnode->AppendOrderedChildNode(n); + } + else if(curnode->IsAssociativeArray()) + { + //n is the id, so need to get the next token + StringInternPool::StringID index_sid = EvaluableNode::ToStringIDTakingReferenceAndClearing(n); + + //reset the node type but continue to accumulate any attributes + n->SetType(ENT_NULL, evaluableNodeManager, false); + n = GetNextToken(n); + curnode->SetMappedChildNodeWithReferenceHandoff(index_sid, n, true); + + //handle case if uneven number of arguments + if(n == nullptr) + { + //nothing here at all + if(curnode == nullptr) + return nullptr; + + const auto &parent = parentNodes.find(curnode); + + //if no parent, then all finished + if(parent == end(parentNodes) || parent->second == nullptr) + return tree_top; + + //jump up to the parent node + curnode = parent->second; + continue; + } + } + + parentNodes[n] = curnode; + + //if it's not immediate, then descend into that part of the tree, resetting parent index counter + if(!IsEvaluableNodeTypeImmediate(n->GetType())) + curnode = n; + + //if specifying something unusual, then assume it's just a null + if(n->GetType() == ENT_NOT_A_BUILT_IN_TYPE) + n->SetType(ENT_NULL, evaluableNodeManager); + } + + } + + return tree_top; +} + +void Parser::AppendComments(EvaluableNode *n, size_t indentation_depth, bool pretty, std::string &to_append) +{ + const auto comment_lines = n->GetCommentsSeparateLines(); + +#ifdef DEBUG_PARSER_PRINT_FLAGS + //prints out extra comments for debugging + if(n->GetIsIdempotent() || n->GetNeedCycleCheck()) + { + if(indentation_depth > 0 && pretty) + AppendNewlineWithIndentation(to_append, indentation_depth, pretty); + + //add comment sign + to_append.push_back(';'); + if(n->GetIsIdempotent()) + to_append.append("idempotent "); + if(n->GetNeedCycleCheck()) + to_append.append("need_cycle_check "); + + if(pretty) + AppendNewlineWithIndentation(to_append, indentation_depth, pretty); + else //need to end a comment with a newline even if not pretty + to_append.append("\r\n"); + } +#endif + + if(comment_lines.size() == 0) + return; + + //if not start of file, make sure there's an extra newline before the comments + if(indentation_depth > 0 && pretty) + AppendNewlineWithIndentation(to_append, indentation_depth, pretty); + + for(auto &comment : comment_lines) + { + //add comment sign + to_append.push_back(';'); + to_append.append(comment); + + if(pretty) + AppendNewlineWithIndentation(to_append, indentation_depth, pretty); + else //need to end a comment with a newline even if not pretty + to_append.append("\r\n"); + } +} + +//if the string contains a character that needs to be escaped for labels, then will convert +std::string ConvertLabelToQuotedStringIfNecessary(const std::string &s) +{ + bool needs_escape = false; + + //check for any characters that need to be escaped + if(s.find_first_of(" \t\"\n\r") != std::string::npos) + needs_escape = true; + + if(!needs_escape) + { + //if the whole thing starts with #'s, then it's fine + // but if it has #'s and then something else, then another #, then it needs to be escaped + size_t last_hash_pos = s.find_last_of('#'); + if(last_hash_pos != std::string::npos) + { + //get all #'s at the front + size_t num_starting_hashes = 0; + while(s[num_starting_hashes] == '#') + num_starting_hashes++; + + //if the position after the last starting hash is the same as the last hash, then don't transform the string + if(num_starting_hashes - 1 != last_hash_pos) + needs_escape = true; + } + } + + if(!needs_escape) + return s; + + //need to quote and escape the string + std::string result; + result.push_back('"'); + + if(Parser::NeedsBackslashify(s)) + result.append(Parser::Backslashify(s)); + else + result.append(s); + + result.push_back('"'); + return result; +} + +void Parser::AppendLabels(UnparseData &upd, EvaluableNode *n, size_t indentation_depth, bool pretty) +{ + size_t num_labels = n->GetNumLabels(); + for(size_t i = 0; i < num_labels; i++) + { + //add label sign + upd.result.push_back('#'); + upd.result.append(ConvertLabelToQuotedStringIfNecessary(n->GetLabel(i))); + + //if not the last label, then separate via spaces + if(i + 1 < num_labels || !pretty) + upd.result.push_back(' '); + else //last label and pretty printing + { + //if just an immediate or no child nodes, then separate with space + if(IsEvaluableNodeTypeImmediate(n->GetType()) || n->GetNumChildNodes() == 0) + upd.result.push_back(' '); + else //something more elaborate, put newline and reindent + AppendNewlineWithIndentation(upd.result, indentation_depth, pretty); + } + } +} + +void Parser::AppendAssocKeyValuePair(UnparseData &upd, StringInternPool::StringID key_sid, EvaluableNode *n, EvaluableNode *parent, + bool expanded_whitespace, size_t indentation_depth) +{ + if(expanded_whitespace) + { + for(size_t i = 0; i < indentation_depth; i++) + upd.result.push_back(indentationCharacter); + } + else + upd.result.push_back(' '); + + const std::string &key_str = string_intern_pool.GetStringFromID(key_sid); + + //surround in quotes only if needed + if(key_sid != string_intern_pool.NOT_A_STRING_ID + && HasCharactersBeyondIdentifier(key_str)) + { + upd.result.push_back('"'); + upd.result.append(Backslashify(key_str)); + upd.result.push_back('"'); + } + else + { + upd.result.append(key_str); + } + + //space between key and value + upd.result.push_back(' '); + + Unparse(upd, n, parent, expanded_whitespace, indentation_depth + 1, false); +} + +void Parser::Unparse(UnparseData &upd, EvaluableNode *tree, EvaluableNode *parent, bool expanded_whitespace, size_t indentation_depth, bool need_initial_indent) +{ + //if need to check for circular references, + // can skip if nullptr, as the code below this will handle nullptr and apply appropriate spacing + if(!upd.cycleFree && tree != nullptr) + { + //keep track of what was visited + auto [_, inserted] = upd.parentNodes.insert(std::make_pair(tree, parent)); + + //if code already referenced, then print path to it + if(!inserted) + { + upd.preevaluationNeeded = true; + + EvaluableNode *code_to_print = GetCodeForPathFromAToB(upd, parent, tree); + //unparse the path using a new set of parentNodes as to not pollute the one currently being unparsed + EvaluableNode::ReferenceAssocType references; + std::swap(upd.parentNodes, references); + Unparse(upd, code_to_print, nullptr, expanded_whitespace, indentation_depth, need_initial_indent); + std::swap(upd.parentNodes, references); //put the old parentNodes back + upd.enm->FreeNodeTree(code_to_print); + + return; + } + } + + //add indentation + if(expanded_whitespace && need_initial_indent) + { + for(size_t i = 0; i < indentation_depth; i++) + upd.result.push_back(indentationCharacter); + } + + if(tree == nullptr) + { + upd.result.append(expanded_whitespace ? "(null)\r\n" : "(null)"); + return; + } + + //if already hit this node, then need to create code to rebuild the circular reference + + //add to check for circular references + upd.parentNodes[tree] = parent; + + if(upd.emitAttributes) + { + AppendComments(tree, indentation_depth, expanded_whitespace, upd.result); + AppendLabels(upd, tree, indentation_depth, expanded_whitespace); + + if(tree->GetConcurrency() == true) + upd.result.append("||"); + + //emit an @ to indicate that it needs to be translated into a map or is some other preevaluation + if(upd.preevaluationNeeded) + { + upd.result.push_back('@'); + upd.preevaluationNeeded = false; + } + } + + //check if it's an immediate/variable before deciding whether to surround with parenthesis + if(IsEvaluableNodeTypeImmediate(tree->GetType())) + { + switch(tree->GetType()) + { + case ENT_NUMBER: + upd.result.append(EvaluableNode::ToString(tree)); + break; + case ENT_STRING: + { + auto sid = tree->GetStringIDReference(); + if(sid == string_intern_pool.NOT_A_STRING_ID) + { + upd.result.append(".nas"); + } + else //legitimate string + { + upd.result.push_back('"'); + + auto &s = tree->GetStringValue(); + if(NeedsBackslashify(s)) + upd.result.append(Backslashify(s)); + else + upd.result.append(s); + + upd.result.push_back('"'); + } + break; + } + case ENT_SYMBOL: + upd.result.append(tree->GetStringValue()); + break; + default: + break; + } + if(expanded_whitespace) + upd.result.append("\r\n"); + } + else + { + //emit opcode + upd.result.push_back('('); + upd.result.append(GetStringFromEvaluableNodeType(tree->GetType())); + + bool recurse_expanded_whitespace = expanded_whitespace; + if(expanded_whitespace) + { + //if small enough, just inline + auto &ocn = tree->GetOrderedChildNodes(); + auto &mcn = tree->GetMappedChildNodes(); + + //need to double count mapped child nodes because of keys + size_t num_child_nodes = ocn.size() + 2 * mcn.size(); + if(num_child_nodes == 0) + { + recurse_expanded_whitespace = false; + } + else if(num_child_nodes <= 6 && num_child_nodes + indentation_depth < 14) + { + //make sure all child nodes are leaf nodes and have no metadata + bool all_leaf_nodes = true; + for(auto cn : ocn) + { + if(cn != nullptr && (cn->GetNumChildNodes() > 0 + || cn->GetCommentsStringId() != StringInternPool::NOT_A_STRING_ID || cn->GetNumLabels() > 0)) + { + all_leaf_nodes = false; + break; + } + } + + for(auto &[_, cn] : mcn) + { + //need to count the additional node for the string index + if(cn != nullptr && (cn->GetNumChildNodes() > 0 + || cn->GetCommentsStringId() != StringInternPool::NOT_A_STRING_ID || cn->GetNumLabels() > 0)) + { + all_leaf_nodes = false; + break; + } + } + + if(all_leaf_nodes) + recurse_expanded_whitespace = false; + } + + //if expanding out further, add extra whitespace + if(recurse_expanded_whitespace) + upd.result.append("\r\n"); + } + + if(tree->IsAssociativeArray()) + { + auto &tree_mcn = tree->GetMappedChildNodesReference(); + if(!upd.sortKeys) + { + for(auto &[k_id, k] : tree_mcn) + AppendAssocKeyValuePair(upd, k_id, k, tree, recurse_expanded_whitespace, indentation_depth + 1); + } + else //sortKeys + { + std::vector key_sids; + key_sids.reserve(tree_mcn.size()); + for(auto &[k_id, _] : tree_mcn) + key_sids.push_back(k_id); + + std::sort(begin(key_sids), end(key_sids), StringIDNaturalCompareSort); + + for(auto &key_sid : key_sids) + { + auto k = tree_mcn.find(key_sid); + AppendAssocKeyValuePair(upd, k->first, k->second, tree, recurse_expanded_whitespace, indentation_depth + 1); + } + } + } + else if(tree->IsOrderedArray()) + { + if(recurse_expanded_whitespace) + { + for(auto &e : tree->GetOrderedChildNodesReference()) + Unparse(upd, e, tree, true, indentation_depth + 1, true); + } + else //expanded whitespace + { + for(auto &e : tree->GetOrderedChildNodesReference()) + { + upd.result.push_back(' '); + Unparse(upd, e, tree, false, indentation_depth + 1, true); + } + } + } + + //add closing parenthesis + if(expanded_whitespace) + { + //indent if appropriate + if(recurse_expanded_whitespace) + { + for(size_t i = 0; i < indentation_depth; i++) + upd.result.push_back(indentationCharacter); + } + upd.result.append(")\r\n"); + } + else + { + upd.result.append(")"); + } + } +} + +EvaluableNode *Parser::GetNodeFromRelativeCodePath(EvaluableNode *path) +{ + if(path == nullptr) + return nullptr; + + //traverse based on type + switch(path->GetType()) + { + + case ENT_GET: + { + if(path->GetOrderedChildNodes().size() < 2) + return nullptr; + + EvaluableNode *result = GetNodeFromRelativeCodePath(path->GetOrderedChildNodes()[0]); + if(result == nullptr) + return result; + EvaluableNode *index_node = path->GetOrderedChildNodes()[1]; + if(index_node == nullptr) + return nullptr; + + //if it's an assoc, then treat the index as a string + if(result->GetMappedChildNodes().size() > 0) + { + StringInternPool::StringID index_sid = EvaluableNode::ToStringIDIfExists(index_node); + EvaluableNode **found = result->GetMappedChildNode(index_sid); + if(found != nullptr) + return *found; + return nullptr; + } + + //otherwise treat the index as a number for a list + size_t index = static_cast(EvaluableNode::ToNumber(index_node)); + if(result->GetOrderedChildNodes().size() > index) + return result->GetOrderedChildNodes()[index]; + } + + case ENT_TARGET: + { + //first parameter is the number of steps to crawl up in the parent tree + size_t steps_up = 0; + if(path->GetOrderedChildNodes().size() > 0) + steps_up = static_cast(EvaluableNode::ToNumber(path->GetOrderedChildNodes()[0])); + + //at least need to go up one step + steps_up++; + + //crawl up parse tree + EvaluableNode *result = path; + while(steps_up > 0 && result != nullptr) + { + auto found = parentNodes.find(result); + if(found != end(parentNodes)) + result = found->second; + else + result = nullptr; + } + } + + default: + return nullptr; + } + + return nullptr; +} + +void Parser::PreevaluateNodes() +{ + for(auto &n : preevaluationNodes) + { + if(n == nullptr) + continue; + + auto node_type = n->GetType(); + if(node_type == ENT_GET || node_type == ENT_TARGET) + { + EvaluableNode *target = GetNodeFromRelativeCodePath(n); + + //transform the target to a location relative to the target's parent + EvaluableNode *parent = nullptr; + if(target == nullptr) + continue; + parent = parentNodes[target]; + if(parent == nullptr) + continue; + + //copy reference of target to the parent's index of the target + if(parent->IsAssociativeArray()) + { + for(auto &[_, cn] : parent->GetMappedChildNodesReference()) + { + if(cn == n) + { + cn = target; + break; + } + } + } + else if(parent->IsOrderedArray()) + { + for(auto &cn : parent->GetOrderedChildNodesReference()) + { + if(cn == n) + { + cn = target; + break; + } + } + } + + //mark both the originals' parents and the new parents as both cyclic + EvaluableNode::SetParentEvaluableNodesCycleChecks(parentNodes[n], parentNodes); + EvaluableNode::SetParentEvaluableNodesCycleChecks(parent, parentNodes); + + continue; + } + } +} diff --git a/src/Amalgam/Parser.h b/src/Amalgam/Parser.h new file mode 100644 index 00000000..4224dfbf --- /dev/null +++ b/src/Amalgam/Parser.h @@ -0,0 +1,203 @@ +#pragma once + +//project headers: +#include "EvaluableNode.h" +#include "EvaluableNodeManagement.h" + +//system headers: +#include +#include +#include + +class Parser +{ +public: + Parser(); + + //returns true if the string needs to be backslashified + inline static bool NeedsBackslashify(const std::string &s) + { + for(auto c : s) + { + switch(c) + { + case '\0': + case '\\': + case '"': + case '\t': + case '\n': + case '\r': + return true; + + default: + break; + } + } + + return false; + } + + //returns true if the string needs to be backslashified, has spaces, or has special characters + inline static bool HasCharactersBeyondIdentifier(const std::string &s) + { + for(auto c : s) + { + switch(c) + { + case '\0': + case '\\': + case '"': + case '\t': + case '\n': + case '\r': + case ' ': + case '(': + case ')': + case '.': + case '#': + case '@': + case ';': + return true; + + default: + break; + } + } + + return false; + } + + //Returns a properly backslashified string + static std::string Backslashify(const std::string &s); + + //appends a newline to s and indents the newline the required amount + static inline void AppendNewlineWithIndentation(std::string &s, size_t indentation_depth, bool pretty) + { + if(pretty) + { + s.append("\r\n"); + for(size_t i = 0; i < indentation_depth; i++) + s.push_back(indentationCharacter); + } + else + s.push_back(' '); + } + + //Parses the code string and returns a tree of EvaluableNodeReference that represents the code + //if original_source a valid string, will prepend each node with a comment indicating original source + static EvaluableNodeReference Parse(std::string &code_string, EvaluableNodeManager *enm, std::string *original_source = nullptr); + + //Returns a string that represents the tree + // if expanded_whitespace, will emit additional whitespace to make it easier to read + // if emit_attributes, then it will emit comments, labels, concurrency, preevaluations, etc.; if emit_attributes is false, then it will only emit values + // if sort_keys, then it will perform a sort on all unordered nodes + static std::string Unparse(EvaluableNode *tree, EvaluableNodeManager *enm, + bool expanded_whitespace = true, bool emit_attributes = true, bool sort_keys = false); + + //prefix used in the comments when attributing sources to EvaluableNodes + inline static const std::string sourceCommentPrefix = "src: "; + +protected: + + //data passed down through recursion with UnparseData + class UnparseData + { + public: + //result string + std::string result; + + //parentNodes contains each reference as the key and the parent as the value + EvaluableNode::ReferenceAssocType parentNodes; + + EvaluableNodeManager *enm; + + //if true, then the tree is cycle free and don't need to keep track of potential circular references + bool cycleFree; + + //if true, then should be marked for preevaluation + bool preevaluationNeeded; + + //if true, then emit comments, labels, concurrency, preevaluations, etc. + bool emitAttributes; + + //if true, then it will perform a sort on all unordered nodes + bool sortKeys; + }; + + //Returns code that will get from location a to b. + static EvaluableNode *GetCodeForPathFromAToB(UnparseData &upd, EvaluableNode *a, EvaluableNode *b); + + //Skips whitespace and accumulates any attributes (e.g., labels, comments) on to target + void SkipWhitespaceAndAccumulateAttributes(EvaluableNode *target); + + //Parses until the end of the quoted string, updating the position and returns the string with interpreted characters + std::string ParseString(); + + //Skips non-whitespace, non-parenthesis, and non-label markers, non-comment begin, etc. + // if allow_leading_label_marks is true, then it will not end on label marks (#) at the beginning of the string + void SkipToEndOfIdentifier(bool allow_leading_label_marks = false); + + //Advances position and returns the current identifier + // if allow_leading_label_marks is true, then it will not end on label marks (#) at the beginning of the string + std::string GetNextIdentifier(bool allow_leading_label_marks = false); + + //Returns a EvaluableNode containing the next token, null if none left in current context + //if new_token is not nullptr, it will put the token in the EvaluableNode provided, otherwise will return a new one + EvaluableNode *GetNextToken(EvaluableNode *new_token = nullptr); + + //deallocates the current node in case there is an early exit or error + void FreeNode(EvaluableNode *node); + + //Parses the next block of code, then returns the block + EvaluableNode *ParseNextBlock(); + + //Prints out all comments for the respective node + static void AppendComments(EvaluableNode *n, size_t indentation_depth, bool pretty, std::string &to_append); + + //Prints out all labels for the respective node. If omit_label is not null, it will not print any label that matches it + static void AppendLabels(UnparseData &upd, EvaluableNode *n, size_t indentation_depth, bool pretty); + + //Prints out key and its associated node n + static void AppendAssocKeyValuePair(UnparseData &upd, + StringInternPool::StringID key_sid, EvaluableNode *n, EvaluableNode *parent, + bool expanded_whitespace, size_t indentation_depth); + + //Appends to the string s that represents the code tree + //if expanded_whitespace, then it will add whitespace as appropriate to make it pretty + // each line is additionally indented by the number of spaces specified + // if need_initial_indent is true, then it will perform an indentation before generating the first code, + // otherwise, will assume the indentation is already where it should be + static void Unparse(UnparseData &upd, EvaluableNode *tree, EvaluableNode *parent, bool expanded_whitespace, size_t indentation_depth, bool need_initial_indent); + + //given a path starting at path's parent, parses the path and returns the target location + EvaluableNode *GetNodeFromRelativeCodePath(EvaluableNode *path); + + //resolves any nodes that require preevaluation (such as assocs or circular references) + void PreevaluateNodes(); + + //Pointer to code currently being parsed + std::string *code; + + //Position of the code currently being parsed + size_t pos; + + //Current line number + size_t lineNumber; + + //Position at the start of the current line + size_t lineStartPos; + + //Original source (e.g., file if applicable) + std::string originalSource; + + //contains a list of nodes that need to be preevaluated on parsing + std::vector preevaluationNodes; + + //parentNodes contains each reference as the key and the parent as the value + EvaluableNode::ReferenceAssocType parentNodes; + + EvaluableNodeManager *evaluableNodeManager; + + //character used for indendation + static const char indentationCharacter = '\t'; +}; diff --git a/src/Amalgam/PartialSum.h b/src/Amalgam/PartialSum.h new file mode 100644 index 00000000..38a7808c --- /dev/null +++ b/src/Amalgam/PartialSum.h @@ -0,0 +1,222 @@ +//project headers: +#include "FastMath.h" +#include "PlatformSpecific.h" + +//system headers: +#include + +//Class to store, accumulate, and merge/complete summations efficiently +class PartialSumCollection +{ +public: + //union of the two types of data stored to reduce need for reinterpret_cast + union SumOrMaskBucket + { + uint64_t mask; + double sum; + }; + + PartialSumCollection() + { + numDimensions = 0; + numInstances = 0; + numMaskBuckets = 1; + } + + //defined to keep compatibility with stl containers + using value_type = size_t; + + //iterator for walking along which partial sums have been filled in + struct Iterator + { + __forceinline Iterator(size_t _index, size_t _bit, SumOrMaskBucket *value_location) + : index(_index), valueLocation(value_location) + { } + + __forceinline Iterator operator =(const Iterator &other) + { + index = other.index; + valueLocation = other.valueLocation; + return *this; + } + + __forceinline bool operator ==(const Iterator &other) + { + return index == other.index; + } + + __forceinline bool operator !=(const Iterator &other) + { + return index != other.index; + } + + __forceinline Iterator &operator ++() + { + index++; + return *this; + } + + //dereference operator + __forceinline size_t operator *() + { + return index; + } + + //returns true if current bit is set + __forceinline bool IsIndexComputed() + { + size_t bit = (index % 64); + size_t offset = (index / 64); + return ( (valueLocation + offset)->mask & (1ULL << bit)); + } + + size_t index; + + //pointer to current value + SumOrMaskBucket *valueLocation; + }; + + //clears all data in the collection + void clear() + { + for(auto &v : buffer) + v.mask = 0; + numDimensions = 0; + numInstances = 0; + numMaskBuckets = 1; + } + + //resizes the buffer to accomodate the dimensions and instances specified and clears all data + void ResizeAndClear(size_t num_dimensions, size_t num_instances) + { + numDimensions = num_dimensions; + numInstances = num_instances; + //need a SumOrFeatureMask for each of up to 64 dimensions + numMaskBuckets = ((num_dimensions + 63) / 64); + + bucketStride = numMaskBuckets + 1; + + //need one value for the sum and enough values to hold a bit per dimension + //round up number of dimensions used + //unions are automatically defaulted to zero for all of their attributes + buffer.clear(); + buffer.resize(bucketStride * num_instances); + } + + //finds the bucket's bit for the specified index + static __forceinline size_t GetBucketBitForIndex(size_t index) + { + return 1ULL << (index % 64); + } + + //finds the bucket that contains the index + static __forceinline size_t GetBucketForIndex(size_t index) + { + return index / 64 + 1; + } + + //returns the bucket and bit for the specified dimension + static __forceinline std::pair GetAccumLocation(size_t dimension_index) + { + return std::make_pair(GetBucketForIndex(dimension_index), GetBucketBitForIndex(dimension_index)); + } + + //accumulates the specified value into the value specified by partial_sum_index + // for the accum_location provided by GetAccumLocation + __forceinline void Accum(size_t partial_sum_index, const std::pair accum_location, double value) + { + size_t bucket_offset = bucketStride * partial_sum_index; + buffer[bucket_offset].sum += value; + + buffer[bucket_offset + accum_location.first].mask |= accum_location.second; + } + + //accumulates the value of zero into the value specified by partial_sum_index + // for the accum_location provided by GetAccumLocation + //just like Accum, but faster if the value is zero + __forceinline void AccumZero(size_t partial_sum_index, const std::pair accum_location) + { + size_t bucket_offset = bucketStride * partial_sum_index; + + buffer[bucket_offset + accum_location.first].mask |= accum_location.second; + } + + //gets the number of populated buckets of the sum of index partial_sum_index + __forceinline size_t GetNumFilled(size_t partial_sum_index) + { + size_t start_offset = bucketStride * partial_sum_index + 1; + size_t end_offset = start_offset + numMaskBuckets; + + size_t num_set = 0; + for(size_t offset = start_offset; offset < end_offset; offset++) + num_set += __popcnt64(buffer[offset].mask); + return num_set; + } + + //gets the sum for the specified partial_sum_index + __forceinline double GetSum(size_t partial_sum_index) + { + size_t bucket_offset = bucketStride * partial_sum_index; + return buffer[bucket_offset].sum; + } + + //performs both GetNumFilled and GetSum in one call + __forceinline std::pair GetNumFilledAndSum(size_t partial_sum_index) + { + size_t bucket_offset = bucketStride * partial_sum_index; + double sum = buffer[bucket_offset].sum; + + size_t start_offset = bucket_offset + 1; + size_t end_offset = start_offset + numMaskBuckets; + + size_t num_filled = 0; + for(size_t offset = start_offset; offset < end_offset; offset++) + num_filled += __popcnt64(buffer[offset].mask); + + return std::make_pair(num_filled, sum); + } + + //sets the sum to the specified value + __forceinline void SetSum(size_t partial_sum_index, double value) + { + size_t bucket_offset = bucketStride * partial_sum_index; + buffer[bucket_offset].sum = value; + } + + //returns an iterator for partial_sum_index + __forceinline Iterator BeginPartialSumIndex(size_t partial_sum_index) + { + size_t offset = bucketStride * partial_sum_index + 1; + return Iterator(0, 0, &buffer[offset]); + } + + //returns true if the term of the sum at partial_sum_index and dimension_index has been accumulated yet, else false + __forceinline bool IsIndexComputed(size_t partial_sum_index, size_t dimension_index) + { + size_t bucket = GetBucketForIndex(dimension_index); + size_t mask = GetBucketBitForIndex(dimension_index); + size_t offset = bucketStride * partial_sum_index + bucket; + + return buffer[offset].mask & mask; + } + + /////////////////////// + //data storage + + //partial sum data + //stored interleaved as (sum, dimensionMask[numDimensions])[numInstances] + std::vector buffer; + + //number of dimensions + size_t numDimensions; + size_t numInstances; + + //a cached value computed based on numDimensions + // representing the length of each partial sum data block, excluding the sum + // making the stride length numBuckets + 1 + size_t numMaskBuckets; + + //equal to numMaskBuckets + 1, accounting for the sum + // cached purely for performance reasons + size_t bucketStride; +}; diff --git a/src/Amalgam/PerformanceProfiler.cpp b/src/Amalgam/PerformanceProfiler.cpp new file mode 100644 index 00000000..8825fa3e --- /dev/null +++ b/src/Amalgam/PerformanceProfiler.cpp @@ -0,0 +1,160 @@ +//project headers: +#include "PerformanceProfiler.h" + +PerformanceProfiler performance_profiler; + +void PerformanceProfiler::StartOperation(const std::string &t, int64_t memory_use) +{ + if(!profilingEnabled) + return; + + instructionStackTypeAndStartTimeAndMemUse.push_back(std::make_pair(t, std::make_pair(GetCurTime(), memory_use))); +} + +void PerformanceProfiler::EndOperation(int64_t memory_use = 0) +{ + if(!profilingEnabled) + return; + + //get and remove data from call stack + auto type_and_time_and_mem = instructionStackTypeAndStartTimeAndMemUse.back(); + auto inst_type = type_and_time_and_mem.first; + double inst_start_time = type_and_time_and_mem.second.first; + int64_t inst_start_mem = type_and_time_and_mem.second.second; + instructionStackTypeAndStartTimeAndMemUse.pop_back(); + + double total_instruction_time = GetCurTime() - inst_start_time; + int64_t total_instruction_memory = memory_use - inst_start_mem; + + //accumulate stats + auto stat = numCallsByInstructionType.find(inst_type); + if(stat != end(numCallsByInstructionType)) + { + numCallsByInstructionType[inst_type]++; + timeSpentInInstructionType[inst_type] += total_instruction_time; + memoryAccumulatedInInstructionType[inst_type] += total_instruction_memory; + } + else + { + numCallsByInstructionType[inst_type] = 1; + timeSpentInInstructionType[inst_type] = total_instruction_time; + memoryAccumulatedInInstructionType[inst_type] = total_instruction_memory; + } + + //remove the time on this instruction for any that are currently pending on the stack by adding it to start time + for(auto &record : instructionStackTypeAndStartTimeAndMemUse) + { + record.second.first += total_instruction_time; + record.second.second += total_instruction_memory; + } +} + +size_t PerformanceProfiler::GetTotalNumCalls() +{ + size_t total_call_count = 0; + for(auto &c : numCallsByInstructionType) + total_call_count += c.second; + return total_call_count; +} + +std::pair PerformanceProfiler::GetTotalAndPositiveMemoryIncreases() +{ + int64_t total_mem_increase = 0; + int64_t positive_mem_increase = 0; + for(auto &c : memoryAccumulatedInInstructionType) + { + total_mem_increase += c.second; + if(c.second > 0) + positive_mem_increase += c.second; + } + return std::make_pair(total_mem_increase, positive_mem_increase); +} + +std::vector> PerformanceProfiler::GetNumCallsByType() +{ + //copy to proper data structure + std::vector> results; + results.reserve(numCallsByInstructionType.size()); + for(auto &[s, value] : numCallsByInstructionType) + results.push_back(std::make_pair(s, value)); + + //sort high to low + std::sort(begin(results), end(results), + [](std::pair a, std::pair b) -> bool + { return (a.second) > (b.second); }); + return results; +} + +std::vector> PerformanceProfiler::GetNumCallsByTotalTime() +{ + //copy to proper data structure + std::vector> results; + results.reserve(numCallsByInstructionType.size()); + for(auto &[s, value] : timeSpentInInstructionType) + results.push_back(std::make_pair(static_cast(s), value)); + + //sort high to low + std::sort(begin(results), end(results), + [](std::pair a, std::pair b) -> bool + { return (a.second) > (b.second); }); + return results; +} + +std::vector> PerformanceProfiler::GetNumCallsByAveTime() +{ + //copy to proper data structure + std::vector> results; + results.reserve(numCallsByInstructionType.size()); + for(auto &[s, value] : timeSpentInInstructionType) + { + auto ncbit = numCallsByInstructionType.find(s); + if(ncbit != end(numCallsByInstructionType)) + { + size_t num_calls = ncbit->second; + results.push_back(std::make_pair(static_cast(s), value / num_calls)); + } + } + + //sort high to low + std::sort(begin(results), end(results), + [](std::pair a, std::pair b) -> bool + { return (a.second) > (b.second); }); + return results; +} + +std::vector> PerformanceProfiler::GetNumCallsByTotalMemoryIncrease() +{ + //copy to proper data structure + std::vector> results; + results.reserve(memoryAccumulatedInInstructionType.size()); + for(auto &[s, value] : memoryAccumulatedInInstructionType) + results.push_back(std::make_pair(static_cast(s), static_cast(value))); + + //sort high to low + std::sort(begin(results), end(results), + [](std::pair a, std::pair b) -> bool + { return (a.second) > (b.second); }); + return results; +} + +std::vector> PerformanceProfiler::GetNumCallsByAveMemoryIncrease() +{ + //copy to proper data structure + std::vector> results; + results.reserve(memoryAccumulatedInInstructionType.size()); + for(auto &[s, value] : memoryAccumulatedInInstructionType) + { + auto ncbit = numCallsByInstructionType.find(s); + if(ncbit != end(numCallsByInstructionType)) + { + size_t num_calls = ncbit->second; + results.push_back(std::make_pair(static_cast(s), static_cast(value) / num_calls)); + } + } + + //sort high to low + std::sort(begin(results), end(results), + [](std::pair a, std::pair b) -> bool + { return (a.second) > (b.second); }); + return results; +} diff --git a/src/Amalgam/PerformanceProfiler.h b/src/Amalgam/PerformanceProfiler.h new file mode 100644 index 00000000..40588bf1 --- /dev/null +++ b/src/Amalgam/PerformanceProfiler.h @@ -0,0 +1,68 @@ +#pragma once + +//project headers: +#include "EvaluableNode.h" +#include "HashMaps.h" + +//system headers: +#include +#include + +//forward declarations: +class PerformanceProfiler; +extern PerformanceProfiler performance_profiler; + +class PerformanceProfiler +{ +public: + PerformanceProfiler() + { EnableProfiling(false); } + + //begins performance timers for the specified operation type, specified by the string t + // pushes current instruction on the stack, such that it will be cleared when the + // corresponding EndOperation is called + void StartOperation(const std::string &t, int64_t memory_use); + + void EndOperation(int64_t memory_use); + + size_t GetTotalNumCalls(); + + std::pair GetTotalAndPositiveMemoryIncreases(); + + std::vector> GetNumCallsByType(); + + std::vector> GetNumCallsByTotalTime(); + + std::vector> GetNumCallsByAveTime(); + + std::vector> GetNumCallsByTotalMemoryIncrease(); + + std::vector> GetNumCallsByAveMemoryIncrease(); + + void EnableProfiling(bool enable = true) + { profilingEnabled = enable; } + + bool IsProfilingEnabled() + { return profilingEnabled; } + + //gets the current time with nanosecond resolution cast to a double measured in seconds + static inline double GetCurTime() + { + typedef std::chrono::steady_clock clk; + auto cur_time = std::chrono::duration_cast(clk::now().time_since_epoch()).count(); + return cur_time / 1000.0 / 1000.0 / 1000.0; + } + +protected: + + //if true, then will record profiling data + bool profilingEnabled; + + //keeps track of number of instructions and time spent in them + FastHashMap numCallsByInstructionType; + FastHashMap timeSpentInInstructionType; + FastHashMap memoryAccumulatedInInstructionType; + + //contains the type and start time of each instruction + std::vector>> instructionStackTypeAndStartTimeAndMemUse; +}; \ No newline at end of file diff --git a/src/Amalgam/PlatformSpecific.cpp b/src/Amalgam/PlatformSpecific.cpp new file mode 100644 index 00000000..545e64ee --- /dev/null +++ b/src/Amalgam/PlatformSpecific.cpp @@ -0,0 +1,354 @@ +//project headers: +#include "PlatformSpecific.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +#ifdef OS_WINDOWS + + //disable std::wstring_convert deprecation warning: no replacement in C++17 so + // will require rework. + #pragma warning(disable: 4996) + + #define NOMINMAX + #include + + class WindowsUtf8WStringConversion + { + public: + //convert UTF-8 string to wstring + inline std::wstring utf8_to_wstring(const std::string& str) + { + return conversion.from_bytes(str); + } + + //convert wstring to UTF-8 string + inline std::string wstring_to_utf8(const std::wstring& str) + { + return conversion.to_bytes(str); + } + + protected: + std::wstring_convert> conversion; + }; + +#else + #include + #include + #include + #include + #include + #include +#endif + +std::vector Platform_SplitArgString(const std::string &arg_string) +{ + std::vector args; + + size_t cur_pos = 0; + while(cur_pos < arg_string.size()) + { + //skip over any leading spaces + if(std::isspace(static_cast(arg_string[cur_pos]))) + { + cur_pos++; + continue; + } + + std::string cur_arg; + + //quotation, so go to the end of quotation + if(arg_string[cur_pos] == '"') + { + cur_pos++; + while(cur_pos < arg_string.size()) + { + if(arg_string[cur_pos] == '"') + { + cur_pos++; + break; + } + + cur_arg.push_back(arg_string[cur_pos++]); + } + } + else //not quotation, go until next whitespace + { + while(cur_pos < arg_string.size()) + { + if(std::isspace(static_cast(arg_string[cur_pos]))) + { + cur_pos++; + break; + } + + cur_arg.push_back(arg_string[cur_pos++]); + } + } + + args.push_back(cur_arg); + } + + return args; +} + +void Platform_SeparatePathFileExtension(const std::string &combined, std::string &path, std::string &base_filename, std::string &extension) +{ + if(combined.size() == 0) + return; + + //get path + path = combined; + size_t first_forward_slash = path.rfind('/'); + size_t first_backslash = path.rfind('\\'); + size_t first_slash; + + if(first_forward_slash == std::string::npos && first_backslash == std::string::npos) + first_slash = 0; + else if(first_forward_slash != std::string::npos && first_backslash == std::string::npos) + first_slash = first_forward_slash; + else if(first_forward_slash == std::string::npos && first_backslash != std::string::npos) + first_slash = first_backslash; + else //grab whichever one is closer to the end of the string + first_slash = std::max(first_forward_slash, first_backslash); + + if(first_slash == 0) + path = std::string("./"); + else + { + first_slash++; //keep the slash in the path + path = combined.substr(0, first_slash); + } + + //get extension + std::string filename = combined.substr(first_slash, combined.size() - first_slash); + size_t extension_position = filename.rfind('.'); + if(extension_position != std::string::npos) + { + base_filename = filename.substr(0, extension_position); + if(filename.size() > extension_position) + extension = filename.substr(extension_position + 1, filename.size() - (extension_position + 1)); //get rid of . + } + else + { + base_filename = filename; + extension = ""; + } +} + +void Platform_GetFileNamesOfType(std::vector &file_names, const std::string &path, const std::string &extension, bool get_directories) +{ +#ifdef OS_WINDOWS + std::string path_with_wildcard = path + "\\*." + extension; + + WindowsUtf8WStringConversion conv; + auto wstring = conv.utf8_to_wstring(path_with_wildcard); + + //retreive file names + WIN32_FIND_DATA find_data; + HANDLE find = FindFirstFile(wstring.c_str(), &find_data); + while(find != INVALID_HANDLE_VALUE) + { + //if looking for directories and it's a directory, or not looking for directories and not a directory, count it + if((get_directories && (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) + || (!get_directories && !(find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))) + { + auto utf8_string = conv.wstring_to_utf8(find_data.cFileName); + file_names.push_back(utf8_string); + } + + if(!FindNextFile(find, &find_data)) + break; + } + + FindClose(find); + +#else + //see if using a wildcard for all extensions + bool check_extensions = true; + if(extension.size() >= 1 && extension[extension.size() - 1] == '*') + check_extensions = false; + + DIR *dh = opendir(path.c_str()); + if(dh == nullptr) + return; + + struct dirent *ent; + while((ent = readdir(dh))) + { + if(check_extensions) + { + char *cur_ext = strstr(ent->d_name, extension.c_str()); + if(cur_ext == nullptr + || strlen(ent->d_name) != ((intptr_t)cur_ext - (intptr_t)ent->d_name) + extension.size()) + continue; + } + + //make a string of the filename (including relative path) + std::string full_path = path + '/' + ent->d_name; + + //check if it is a directory + struct stat stat_buf; + stat(full_path.c_str(), &stat_buf); + bool is_dir = S_ISDIR(stat_buf.st_mode); + if(get_directories == is_dir) + file_names.push_back(std::string(ent->d_name)); + } + + closedir(dh); + +#endif +} + +std::string Platform_RunSystemCommand(std::string command, bool &successful_run, int &exit_code) +{ + FILE *p; +#ifdef OS_WINDOWS + p = _popen(command.c_str(), "r"); +#else + p = popen(command.c_str(), "r"); +#endif + + if(p == NULL) + { + exit_code = 0; + successful_run = false; + return ""; + } + + successful_run = true; + + std::string stdout_data; + + //not the fastest, but robust + char ch; + while((ch = fgetc(p)) != EOF) + stdout_data.push_back(ch); + +#ifdef OS_WINDOWS + exit_code = _pclose(p); +#else + exit_code = pclose(p); +#endif + + return stdout_data; +} + +std::string Platform_GetHomeDirectory() +{ +#if defined (WINDOWS) || defined (WIN32) || defined (_WIN32) + static char buff[MAX_PATH]; + const DWORD ret = GetEnvironmentVariableA("USERPROFILE", &buff[0], MAX_PATH); + if(ret == 0 || ret > MAX_PATH) + return ""; + else + return &buff[0]; +#else + return getenv("HOME"); +#endif +} + +bool Platform_IsResourcePathAccessible(const std::string &resource_path, bool must_exist, std::string &error) +{ + struct stat fileStatus; + errno = 0; + if(stat(resource_path.c_str(), &fileStatus) == -1) // == 0 ok; == -1 error + { + if(must_exist && errno == ENOENT) + { + error = "Resource path does not exist, or path is an empty string."; + return false; + } + else if(errno == ENOTDIR) + { + error = "A component of the path is not a directory."; + return false; + } + else if(errno == ELOOP) + { + error = "Too many symbolic links encountered while traversing the path."; + return false; + } + else if(errno == EACCES) + { + error = "Permission denied."; + return false; + } + else if(errno == ENAMETOOLONG) + { + error = "File cannot be read."; + return false; + } + } + + return true; +} + +void Platform_GenerateSecureRandomData(void *buffer, size_t length) +{ +#ifdef OS_WINDOWS + HCRYPTPROV hCryptProv; + CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT); + CryptGenRandom(hCryptProv, static_cast(length), static_cast(buffer)); + CryptReleaseContext(hCryptProv, 0); +#else + std::ifstream fp("/dev/random", std::ios::in | std::ios::binary); + if(fp.good()) + fp.read(static_cast(buffer), sizeof(uint8_t) * length); + fp.close(); +#endif +} + +void Platform_EnsurePreciseTiming() +{ + //need to link to extra .libs for this +#if defined(OS_WINDOWS) && defined(OS_WINDOWS_ACCURATE_SLEEP) + static bool time_resolution_initialized = false; + if(!time_resolution_initialized) + { + timeBeginPeriod(1); + time_resolution_initialized = true; + } +#endif +} + +//performs localtime in a threadsafe manner +bool Platform_ThreadsafeLocaltime(std::time_t time_value, std::tm **localized_time) +{ +#ifdef OS_WINDOWS + return localtime_s(*localized_time, &time_value) == 0; //MS swaps the values and returns the wrong thing +#else // POSIX + return ::localtime_r(&time_value, *localized_time) != nullptr; +#endif +} + +bool Platform_IsDebuggerPresent() +{ +#ifdef OS_WINDOWS + return (IsDebuggerPresent() ? true : false); +#endif + return false; +} + +std::string Platform_GetOperatingSystemName() +{ +#ifdef OS_WINDOWS + return "Windows"; +#endif + +#ifdef OS_LINUX + return "Linux"; +#endif + +#ifdef OS_MAC + return "Darwin"; +#endif + + return "Unknown"; +} diff --git a/src/Amalgam/PlatformSpecific.h b/src/Amalgam/PlatformSpecific.h new file mode 100644 index 00000000..92cad6d4 --- /dev/null +++ b/src/Amalgam/PlatformSpecific.h @@ -0,0 +1,266 @@ +#pragma once + +//system headers: +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//cross-platform main functions +#define PLATFORM_MAIN_CONSOLE int main(int argc, char* argv[]) + +#define PLATFORM_ARGS_CONSOLE auto args = Platform_ArgvToStringViews(argc, argv); + +#ifdef _WIN32 + #define OS_WINDOWS + + #define NOMINMAX + #include + + #define PLATFORM_MAIN_NO_CONSOLE int APIENTRY WinMain(HINSTANCE hCurrentInst, HINSTANCE hPreviousInst, LPSTR lpszCmdLine, int nCmdShow) + + #define PLATFORM_ARGS_NO_CONSOLE \ + std::string arg_string(lpszCmdLine); \ + auto args = Platform_SplitArgString(arg_string); + +#else + #ifdef __linux__ + #define OS_LINUX + #elif defined(__APPLE__) || defined(__MACH__) + #define OS_MAC + #endif + + #define PLATFORM_MAIN_NO_CONSOLE PLATFORM_MAIN_CONSOLE + + #define PLATFORM_ARGS_NO_CONSOLE PLATFORM_ARGS_CONSOLE + + //include signal to raise exception in linux + #include + +#endif + +//defines __popcnt64 if it doesn't exist +//platform independent intrinsic for bit count on a 64-bit var +#if defined(__GNUC__) + #define __popcnt64 __builtin_popcountll +#elif !defined(_MSC_VER) + size_t __popcnt64(uint64_t x) + { + size_t bit_count = 0; + while(x > 0) + { + if(x & 1) + bit_count++; + + x <<= 1; + } + return bit_count; + } +#endif + +//returns the offset of the first bit set in x, starting at 0 as the least significant bit +inline size_t Platform_FindFirstBitSet(uint64_t x) +{ +#if defined(__GNUC__) + return __builtin_ctzll(x); +#elif defined(_MSC_VER) + unsigned long bit; + _BitScanForward64(&bit, x); + return bit; +#else + size_t bit = 0; + while((x & (1ULL << bit)) == 0) + bit++; + return bit; +#endif +} + +//returns the offset of the last bit set in x, starting at 63 as the most significant bit +inline size_t Platform_FindLastBitSet(uint64_t x) +{ +#if defined(__GNUC__) + //counts the number of leading zeros, so need to find the difference between that + // and the number of digits to find the first 1 + //note that this is different behavior than the other two implementations below because of what is returned + return 63 - __builtin_clzll(x); +#elif defined(_MSC_VER) + unsigned long bit; + _BitScanReverse64(&bit, x); + return bit; +#else + size_t bit = 63; + while((x & (1ULL << bit)) == 0) + bit--; + return bit; +#endif +} + +//changes argv into string_view for easier use +inline std::vector Platform_ArgvToStringViews(int argc, char **argv) +{ + std::vector args; + args.reserve(argc); + for(int i = 0; i < argc; i++) + args.emplace_back(argv[i]); + return args; +} + +//attempts to open filename +//if successful, returns a string of data from the file and true +//if failure, returns an error message and false +inline std::pair Platform_OpenFileAsString(const std::string &filename) +{ + std::ifstream inf(filename, std::ios::in | std::ios::binary); + std::string data; + + if(!inf.good()) + { + data = "Error loading file: " + filename; + return std::make_pair(data, false); + } + + inf.seekg(0, std::ios::end); + size_t file_size = inf.tellg(); + if(file_size > 0) + { + data.resize(static_cast(file_size)); + inf.seekg(0, std::ios::beg); + inf.read(&data[0], data.size()); + } + inf.close(); + + return std::make_pair(data, true); +} + +//converts the string to a double, and returns true if it was successful, false if not +// note1: std::from_chars is supposed to be supported in all C++17 compliant compilers but +// is not. If upgrading to gcc-11 or beyond, this should be updated. AppleClang does +// not currently have a working implementation on any version. +// note2: std::from_chars is more desireable than std::strtod because it is locale independent +// TODO 15993: Reevaluate when moving to C++20 +inline std::pair Platform_StringToNumber(const std::string& s) +{ +#ifdef OS_WINDOWS + const char* first_char = s.c_str(); + const char* last_char = first_char + s.length(); + double value = 0.0; + auto [ptr, ec] = std::from_chars(first_char, last_char, value); + //if there was no parse error and nothing left on string, then it's a number + if(ec == std::errc() && ptr == last_char) + return std::make_pair(value, true); + return std::make_pair(0.0, false); +#else + const char* start_pointer = s.c_str(); + char* end_pointer = nullptr; + double value = strtod(start_pointer, &end_pointer); + //if didn't reach the end or grabbed nothing, then it's not a number + if(*end_pointer != '\0' || end_pointer == start_pointer) + return std::make_pair(0.0, false); + return std::make_pair(value, true); +#endif +} + +//separates the argument string in a cross-platform manner and returns an appropriate vector of strings +std::vector Platform_SplitArgString(const std::string &arg_string); + +//Takes a string containing a combined path/filename.extension, and breaks it into each of: path, base_filename, and extension +void Platform_SeparatePathFileExtension(const std::string &combined, std::string &path, std::string &base_filename, std::string &extension); + +//fills file_names with the respective files of the given path given the path and extension +// if get_directories is true, it will fetch directories +void Platform_GetFileNamesOfType(std::vector &file_names, const std::string &path, const std::string &extension, bool get_directories = false); + +//runs command returns everything sent to stdout +// any parameters should be included in the command +//successful_run is set to true if the program was able to be found and run +//exit_code is set to the exit code of the program +std::string Platform_RunSystemCommand(std::string command, bool &successful_run, int &exit_code); + +//returns a path to the home directory for the platform +std::string Platform_GetHomeDirectory(); + +//Returns true if resource is readable given whether must_exist is set. Returns false if not, and sets error string to the reason +bool Platform_IsResourcePathAccessible(const std::string &resource_path, bool must_exist, std::string &error); + +//generates cryptographically secure random data into buffer to specified length +void Platform_GenerateSecureRandomData(void *buffer, size_t length); + +//tells the OS that this process wants high-precision timing +void Platform_EnsurePreciseTiming(); + +//performs localtime in a threadsafe manner +// returns true on success +bool Platform_ThreadsafeLocaltime(std::time_t time_value, std::tm &localized_time); + +//returns true if a debugger is present +bool Platform_IsDebuggerPresent(); + +//returns a string representing the name of the operating system +std::string Platform_GetOperatingSystemName(); + +//platform dependent assertion function +#ifdef _DEBUG + +#ifdef OS_MAC +// warnings thrown on OS_MAC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmacro-redefined" +#endif + +#define assert(expr) Platform_Assert(expr, __LINE__) + +#ifdef OS_MAC +// warnings thrown on OS_MAC +#pragma GCC diagnostic pop +#endif + +inline void Platform_Assert(bool expr, size_t line) +{ + if(!expr) + { + std::cerr << "Runtime Exception: Debug Assertion Failed!\nLine: " << line << "\n"; +#ifdef OS_WINDOWS + _ASSERT(expr); +#else + raise(SIGTRAP); +#endif + exit(-1); + } +} + +#else + +#ifdef OS_MAC +// warnings thrown on OS_MAC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmacro-redefined" +#endif + +#define assert(expr) Platform_Assert(expr) + +#ifdef OS_MAC +// warnings thrown on OS_MAC +#pragma GCC diagnostic pop +#endif + +inline void Platform_Assert(bool expr) +{ + if(!expr) + { + std::cerr << "Runtime Exception: Debug Assertion Failed!\n"; + if(Platform_IsDebuggerPresent()) + { + //wait for user input + std::string temp; + std::getline(std::cin, temp); + } + exit(-1); + } +} + +#endif diff --git a/src/Amalgam/PrintListener.cpp b/src/Amalgam/PrintListener.cpp new file mode 100644 index 00000000..55cce260 --- /dev/null +++ b/src/Amalgam/PrintListener.cpp @@ -0,0 +1,42 @@ +//project headers: +#include "PrintListener.h" + +PrintListener::PrintListener(const std::string &filename, bool mirror_to_stdio) +{ + if(filename != "") + logFile.open(filename, std::ios::binary); + + mirrorToStdio = mirror_to_stdio; +} + +PrintListener::~PrintListener() +{ + if(logFile.is_open()) + logFile.close(); +} + +void PrintListener::LogPrint(std::string &print_string) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(mutex); +#endif + + if(logFile.is_open() && logFile.good()) + logFile << print_string; + + if(mirrorToStdio) + std::cout << print_string; +} + +void PrintListener::FlushLogFile() +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(mutex); +#endif + + if(logFile.is_open() && logFile.good()) + logFile.flush(); + + if(mirrorToStdio) + std::cout.flush(); +} diff --git a/src/Amalgam/PrintListener.h b/src/Amalgam/PrintListener.h new file mode 100644 index 00000000..f9080dab --- /dev/null +++ b/src/Amalgam/PrintListener.h @@ -0,0 +1,31 @@ +#pragma once + +//project headers: +#include "Concurrency.h" + +//system headers: +#include +#include +#include + +class PrintListener +{ +public: + //stores all prints to file + PrintListener(const std::string &filename = std::string(), bool mirror_to_stdio = false); + + ~PrintListener(); + + void LogPrint(std::string &print_string); + + void FlushLogFile(); + +protected: + std::ofstream logFile; + bool mirrorToStdio = false; + +#ifdef MULTITHREAD_SUPPORT + //mutex for writing to make sure all streams are written in the same order + Concurrency::ReadWriteMutex mutex; +#endif +}; diff --git a/src/Amalgam/Resource.rc b/src/Amalgam/Resource.rc new file mode 100644 index 00000000..a1225156 Binary files /dev/null and b/src/Amalgam/Resource.rc differ diff --git a/src/Amalgam/SBFDSColumnData.h b/src/Amalgam/SBFDSColumnData.h new file mode 100644 index 00000000..b799b83d --- /dev/null +++ b/src/Amalgam/SBFDSColumnData.h @@ -0,0 +1,787 @@ +#pragma once + +//project headers: +#include "DistanceReferencePair.h" +#include "EvaluableNode.h" +#include "EvaluableNodeTreeFunctions.h" +#include "HashMaps.h" +#include "IntegerSet.h" + +//system headers: +#include +#include +#include + +//SBFDSColumnData class maintains a sorted linear and random access data collection +//values with the same key are placed into the same bucket. buckets are stored in sorted order by key +class SBFDSColumnData +{ +public: + //column needs to be named when it is created + inline SBFDSColumnData(StringInternPool::StringID sid) + : stringId(sid) + { + indexWithLongestString = 0; + longestStringLength = 0; + indexWithLargestCode = 0; + largestCodeSize = 0; + } + + //like InsertIndexValue, but used only for building the column data from an empty column + //this function must be called on each index in ascending order; for example, index 2 must be called after index 1 + //inserts number values in entities_with_number_values + //AppendSortedNumberIndicesWithSortedIndices should be called after all indices are inserted + void InsertNextIndexValueExceptNumbers(EvaluableNodeImmediateValueType value_type, EvaluableNodeImmediateValue &value, + size_t index, std::vector> &entities_with_number_values) + { + if(value_type == ENIVT_NOT_EXIST) + { + invalidIndices.insert(index); + } + else if(value_type == ENIVT_NULL) + { + nullIndices.insert(index); + } + else if(value_type == ENIVT_NUMBER) + { + numberIndices.insert(index); + if(FastIsNaN(value.number)) + nanIndices.insert(index); + else + entities_with_number_values.emplace_back(value.number, index); + } + else if(value_type == ENIVT_STRING_ID) + { + stringIdIndices.insert(index); + + //try to insert the value if not already there, inserting an empty pointer + auto [id_entry, inserted] = stringIdValueToIndices.emplace(value.stringID, nullptr); + if(inserted) + id_entry->second = std::make_unique(); + + auto &ids = id_entry->second; + + ids->InsertNewLargestInteger(index); + + UpdateLongestString(value.stringID, index); + } + else if(value_type == ENIVT_CODE) + { + codeIndices.insert(index); + + //find the entities that have the correspending size; if the size doesn't exist, create it + size_t code_size = EvaluableNode::GetDeepSize(value.code); + + auto [size_entry, inserted] = valueCodeSizeToIndices.emplace(code_size, nullptr); + if(inserted) + size_entry->second = std::make_unique(); + + //add the entity + size_entry->second->insert(index); + + UpdateLargestCode(code_size, index); + } + } + + //inserts indices assuming that they have been sorted by value, + // and that index_values are also sorted from smallest to largest + void AppendSortedNumberIndicesWithSortedIndices(std::vector> &index_values) + { + if(index_values.size() == 0) + return; + + //count unique values so only need to perform one allocation for the main list + size_t num_uniques = 1; + double prev_value = index_values[0].distance; + for(size_t i = 1; i < index_values.size(); i++) + { + if(prev_value != index_values[i].distance) + { + num_uniques++; + prev_value = index_values[i].distance; + } + } + + sortedNumberValueIndexPairs.reserve(num_uniques); + numberIndices.ReserveNumIntegers(index_values.back().reference + 1); + + for(auto &index_value : index_values) + { + //if don't have the right bucket, then need to create one + if(sortedNumberValueIndexPairs.size() == 0 || sortedNumberValueIndexPairs.back().first != index_value.distance) + sortedNumberValueIndexPairs.emplace_back(index_value.distance, std::make_unique()); + + sortedNumberValueIndexPairs.back().second->InsertNewLargestInteger(index_value.reference); + numberIndices.insert(index_value.reference); + } + } + + //returns the value type of the given index given the value + __forceinline EvaluableNodeImmediateValueType GetIndexValueType(size_t index) + { + if(numberIndices.contains(index)) + return ENIVT_NUMBER; + if(stringIdIndices.contains(index)) + return ENIVT_STRING_ID; + if(nullIndices.contains(index)) + return ENIVT_NULL; + if(invalidIndices.contains(index)) + return ENIVT_NOT_EXIST; + return ENIVT_CODE; + } + + //moves index from being associated with key old_value to key new_value + void ChangeIndexValue(EvaluableNodeImmediateValue old_value, EvaluableNodeImmediateValueType new_value_type, EvaluableNodeImmediateValue new_value, size_t index) + { + //if new one is invalid, can quickly delete or return + if(new_value_type == ENIVT_NOT_EXIST) + { + if(!invalidIndices.contains(index)) + { + DeleteIndexValue(old_value, index); + invalidIndices.insert(index); + } + return; + } + + //delete index at old value + DeleteIndexValue(old_value, index); + + //add index at new value bucket + InsertIndexValue(new_value_type, new_value, index); + } + + //deletes everything involving the value at the index + void DeleteIndexValue(EvaluableNodeImmediateValue value, size_t index) + { + if(invalidIndices.EraseAndRetrieve(index)) + return; + + //if value is null, just need to remove from the appropriate index + if(nullIndices.EraseAndRetrieve(index)) + return; + + if(numberIndices.EraseAndRetrieve(index)) + { + //remove, and if not a nan, then need to also remove the number + if(!nanIndices.EraseAndRetrieve(index)) + { + //look up value + auto [value_index, exact_index_found] = FindExactIndexForValue(value.number); + if(!exact_index_found) + return; + + //if the bucket has only one entry, we must delete the entire bucket + if(sortedNumberValueIndexPairs[value_index].second->size() == 1) + { + sortedNumberValueIndexPairs.erase(sortedNumberValueIndexPairs.begin() + value_index); + } + else //else we can just remove the id from the bucket + { + sortedNumberValueIndexPairs[value_index].second->erase(index); + } + } + + return; + } + + if(stringIdIndices.EraseAndRetrieve(index)) + { + auto id_entry = stringIdValueToIndices.find(value.stringID); + if(id_entry != end(stringIdValueToIndices)) + { + auto &entities = *(id_entry->second); + entities.erase(index); + + //if no more entries have the value, remove it + if(entities.size() == 0) + stringIdValueToIndices.erase(id_entry); + } + + //see if need to compute new longest string + if(index == indexWithLongestString) + { + longestStringLength = 0; + //initialize to 0 in case there are no entities with strings + indexWithLongestString = 0; + for(auto &[s_id, s_entry] : stringIdValueToIndices) + UpdateLongestString(s_id, *s_entry->begin()); + } + + return; + } + + //if made it here, then just remove from a code value type + codeIndices.erase(index); + + //find the entities that have the correspending size + size_t num_indices = EvaluableNode::GetDeepSize(value.code); + auto id_entry = valueCodeSizeToIndices.find(num_indices); + if(id_entry == end(valueCodeSizeToIndices)) + return; + + //remove the entity + auto &entities = *(id_entry->second); + entities.erase(index); + + if(entities.size() == 0) + valueCodeSizeToIndices.erase(id_entry); + + //see if need to update largest code + if(index == indexWithLargestCode) + { + largestCodeSize = 0; + //initialize to 0 in case there are no entities with code + indexWithLargestCode = 0; + for(auto &[size, entry] : valueCodeSizeToIndices) + UpdateLargestCode(size, *entry->begin()); + } + } + + //inserts the value at id + void InsertIndexValue(EvaluableNodeImmediateValueType value_type, EvaluableNodeImmediateValue &value, size_t index) + { + if(value_type == ENIVT_NOT_EXIST) + { + invalidIndices.insert(index); + return; + } + + if(value_type == ENIVT_NULL) + { + nullIndices.insert(index); + return; + } + + if(value_type == ENIVT_NUMBER) + { + numberIndices.insert(index); + + if(FastIsNaN(value.number)) + { + nanIndices.insert(index); + return; + } + + //if the value already exists, then put the index in the list + auto [value_index, exact_index_found] = FindExactIndexForValue(value.number); + if(exact_index_found) + { + sortedNumberValueIndexPairs[value_index].second->insert(index); + return; + } + + //insert new value in correct position + size_t new_value_index = FindUpperBoundIndexForValue(value.number); + auto inserted = sortedNumberValueIndexPairs.emplace(sortedNumberValueIndexPairs.begin() + new_value_index, value.number, std::make_unique()); + inserted->second->insert(index); + + return; + } + + if(value_type == ENIVT_STRING_ID) + { + stringIdIndices.insert(index); + + //try to insert the value if not already there + auto [inserted_id_entry, inserted] = stringIdValueToIndices.emplace(value.stringID, nullptr); + if(inserted) + inserted_id_entry->second = std::make_unique(); + + auto &ids = *(inserted_id_entry->second); + + ids.insert(index); + + UpdateLongestString(value.stringID, index); + return; + } + + //value_type == ENIVT_CODE + codeIndices.insert(index); + + //find the entities that have the correspending size; if the size doesn't exist, create it + size_t code_size = EvaluableNode::GetDeepSize(value.code); + + auto [size_entry, inserted] = valueCodeSizeToIndices.emplace(code_size, nullptr); + if(inserted) + size_entry->second = std::make_unique(); + + //add the entity + size_entry->second->insert(index); + + UpdateLargestCode(code_size, index); + } + + //returns the maximum difference between value and any other value for this column + //if empty, will return infinity + inline double GetMaxDifferenceTermFromValue(GeneralizedDistance::FeatureParams &feature_params, EvaluableNodeImmediateValueType value_type, EvaluableNodeImmediateValue &value) + { + switch(feature_params.featureType) + { + case FDT_NOMINAL: + return 1.0; + + case FDT_CONTINUOUS_NUMERIC: + case FDT_CONTINUOUS_UNIVERSALLY_NUMERIC: + if(sortedNumberValueIndexPairs.size() <= 1) + return 0.0; + + return sortedNumberValueIndexPairs.back().first - sortedNumberValueIndexPairs[0].first; + + case FDT_CONTINUOUS_NUMERIC_CYCLIC: + //maximum is the other side of the cycle + return feature_params.typeAttributes.maxCyclicDifference / 2; + + case FDT_CONTINUOUS_STRING: + //the max difference is the worst case edit distance, of removing all the characters + // and adding all the new ones + if(value_type == ENIVT_STRING_ID) + { + auto &s = string_intern_pool.GetStringFromID(value.stringID); + return static_cast(longestStringLength + StringManipulation::GetNumUTF8Characters(s)); + } + else if(value_type == ENIVT_NULL) + { + //if null, then could potentially have to remove a string, then add a new one, so counts as double + return static_cast(longestStringLength * 2); + } + else //not a string, so just count distance of adding the string plus one to remove the non-string value + { + return static_cast(longestStringLength + 1); + } + + case FDT_CONTINUOUS_CODE: + if(value_type == ENIVT_CODE) + return static_cast(largestCodeSize + EvaluableNode::GetDeepSize(value.code)); + else if(value_type == ENIVT_NULL) + //if null, then could potentially have to remove a the code, then add a all new, so counts as double + return static_cast(largestCodeSize * 2); + else //all other immediate types have a size of 1 + return static_cast(largestCodeSize + 1); + + default: + return std::numeric_limits::infinity(); + } + } + + //returns the exact index of value + //Same as std::binary_search but returns both index and if found + // .first: found index - if not found, returns closest index from lower_bound if + // return_index_lower_bound is set, -1 otherwise + // .second: true if exact index was found, false otherwise + inline std::pair FindExactIndexForValue(double value, bool return_index_lower_bound = false) + { + auto target_iter = std::lower_bound(begin(sortedNumberValueIndexPairs), end(sortedNumberValueIndexPairs), value, + [](const auto& value_index_pair, double value) + { + return value_index_pair.first < value; + }); + + if ((target_iter == end(sortedNumberValueIndexPairs)) || (target_iter->first != value)) // not exact match + { + return std::make_pair(return_index_lower_bound ? std::distance(begin(sortedNumberValueIndexPairs), target_iter) : -1 , false); + } + + return std::make_pair(std::distance(begin(sortedNumberValueIndexPairs), target_iter), true); // exact match + } + + //returns the index of the lower bound of value + inline size_t FindLowerBoundIndexForValue(double value) + { + auto target_iter = std::lower_bound(begin(sortedNumberValueIndexPairs), end(sortedNumberValueIndexPairs), value, + [](const auto &value_index_pair, double value) + { + return value_index_pair.first < value; + }); + return std::distance(begin(sortedNumberValueIndexPairs), target_iter); + } + + //returns the index of the upper bound of value + inline size_t FindUpperBoundIndexForValue(double value) + { + auto target_iter = std::upper_bound(begin(sortedNumberValueIndexPairs), end(sortedNumberValueIndexPairs), value, + [](double value, const auto &value_index_pair) + { + return value < value_index_pair.first; + }); + return std::distance(begin(sortedNumberValueIndexPairs), target_iter); + } + + //given a value, returns the index at which the value should be inserted into the sortedNumberValueIndexPairs + //returns true for .second when an exact match is found, false otherwise + //O(log(n)) + //cycle_length will take into account whether wrapping around is closer + inline std::pair FindClosestValueIndexForValue(double value, double cycle_length = std::numeric_limits::infinity()) + { + //first check if value exists + // returns the closest index (lower_bound) if an exact match is not found + auto [value_index, exact_index_found] = FindExactIndexForValue(value, true); + if(exact_index_found) + { + return std::make_pair(value_index, true); + } + + //if only have one element (or zero), short circuit code below + if(sortedNumberValueIndexPairs.size() <= 1) + return std::make_pair(0, false); + + size_t max_valid_index = sortedNumberValueIndexPairs.size() - 1; + size_t target_index = std::min(max_valid_index, value_index); //value_index is lower bound index since no exact match + + //if not cyclic or cyclic and not at the edge + if(cycle_length == std::numeric_limits::infinity() + || (target_index > 0 && target_index < max_valid_index) ) + { + //need to check index again in case not cyclic + // return index with the closer difference + if(target_index < max_valid_index + && (std::abs(sortedNumberValueIndexPairs[target_index + 1].first - value) < std::abs(sortedNumberValueIndexPairs[target_index].first - value))) + return std::make_pair(target_index + 1, false); + else + return std::make_pair(target_index, false); + } + else //cyclic + { + double dist_to_max_index = std::abs(sortedNumberValueIndexPairs[max_valid_index].first - value); + double dist_to_0_index = std::abs(sortedNumberValueIndexPairs[0].first - value); + size_t other_closest_index; + + if(target_index == 0) + { + //wrap around the top + dist_to_max_index = cycle_length - dist_to_max_index; + other_closest_index = 1; + } + else //target_index == max_valid_index + { + //wrap around bottom + dist_to_0_index = cycle_length - dist_to_0_index; + other_closest_index = max_valid_index - 1; + } + + double dist_to_other_closest_index = std::abs(sortedNumberValueIndexPairs[other_closest_index].first - value); + if(dist_to_0_index <= dist_to_other_closest_index && dist_to_0_index <= dist_to_max_index) + return std::make_pair(0, false); + else if(dist_to_other_closest_index <= dist_to_0_index) + return std::make_pair(other_closest_index, false); + else + return std::make_pair(max_valid_index, false); + } + } + + //given a feature_id and a range [low, high], inserts all the elements with values of feature feature_id within specified range into out; does not clear out + //Note about Null/NaNs: + //if the feature value is Nan/Null, it will NOT be present in the search results, ie "x" != 3 will NOT include elements with x is nan/Null, even though nan/null != 3 + void FindAllIndicesWithinRange(EvaluableNodeImmediateValueType value_type, + EvaluableNodeImmediateValue &low, EvaluableNodeImmediateValue &high, BitArrayIntegerSet &out, bool between_values = true) + { + if(value_type == ENIVT_NUMBER) + { + //there are no ids for this column, so return no results + if(sortedNumberValueIndexPairs.size() == 0) + return; + + //make a copy because passed by reference, and may need to change value for logic below + double low_number = low.number; + double high_number = high.number; + + if(FastIsNaN(low_number) || FastIsNaN(high_number)) + { + //both are NaN + if(FastIsNaN(low_number) && FastIsNaN(high_number)) + { + //if looking for NaN + if(between_values) + { + nanIndices.CopyTo(out); + } + else //looking for anything but NaN + { + numberIndices.CopyTo(out); + nanIndices.EraseTo(out); + } + + return; + } + + //if NaN specified and within range, then we want to include NaN indices + if(between_values) + nanIndices.CopyTo(out); + + //modify range to include elements from or up to -/+inf + if(FastIsNaN(low_number)) //find all NaN values and all values up to max + low_number = -std::numeric_limits::infinity(); //else include elements from -inf to high as well as NaN elements + else + high_number = std::numeric_limits::infinity(); //include elements from low to +inf as well as NaN elements + } + + //handle equality and nonequality case + if(low_number == high_number) + { + auto [value_index, exact_index_found] = FindExactIndexForValue(low_number); + if(!exact_index_found) + { + //if not found but looking for it, then just return + if(between_values) + return; + else //the value doesn't exist, include everything + { + //include nans + numberIndices.CopyTo(out); + } + } + + //if within range, and range has no length, just return indices in that one bucket + if(between_values) + { + size_t index = value_index; + out.InsertInBatch(*sortedNumberValueIndexPairs[index].second); + } + else //if not within, populate with all indices not equal to value + { + //include nans + nanIndices.CopyTo(out); + + for(auto &[bucket_val, bucket] : sortedNumberValueIndexPairs) + { + if(bucket_val == low_number) + continue; + + out.InsertInBatch(*bucket); + } + } + + return; + } + + size_t start_index = (low_number == -std::numeric_limits::infinity()) ? 0 : FindLowerBoundIndexForValue(low_number); + size_t end_index = (high_number == std::numeric_limits::infinity()) ? sortedNumberValueIndexPairs.size() : FindUpperBoundIndexForValue(high_number); + + if(between_values) + { + //insert everything between the two indices + for(size_t i = start_index; i < end_index; i++) + out.InsertInBatch(*sortedNumberValueIndexPairs[i].second); + + //include end_index if value matches + if(end_index < sortedNumberValueIndexPairs.size() && sortedNumberValueIndexPairs[end_index].first == high_number) + out.InsertInBatch(*sortedNumberValueIndexPairs[end_index].second); + } + else //not between_values + { + //insert everything left of range + for(size_t i = 0; i < start_index; i++) + out.InsertInBatch(*sortedNumberValueIndexPairs[i].second); + + //insert everything right of range + for(size_t i = end_index; i < sortedNumberValueIndexPairs.size(); i++) + out.InsertInBatch(*sortedNumberValueIndexPairs[i].second); + } + + } + else if(value_type == ENIVT_STRING_ID) + { + if(stringIdValueToIndices.size() == 0) + return; + + //check every string value to see if between + for(auto &[id, entry] : stringIdValueToIndices) + { + //check where the string is in the order; empty strings for comparison always pass + bool value_less_than_low = true; + if(low.stringID != string_intern_pool.NOT_A_STRING_ID && StringNaturalCompare(low.stringID, id) <= 0) + value_less_than_low = false; + + bool value_less_than_high = true; + if(high.stringID != string_intern_pool.NOT_A_STRING_ID && StringNaturalCompare(high.stringID, id) <= 0) + value_less_than_high = false; + + if(between_values) + { + if(value_less_than_low || !value_less_than_high) + continue; + } + else //not between_values + { + if(!value_less_than_low && value_less_than_high) + continue; + } + + //insert all entities with this value + for(auto index : *entry) + out.insert(index); + } + } + } + + //given a value, inserts into out all the entities that have the value + // does not handle ENIVT_CODE because it doesn't have the data + void UnionAllIndicesWithValue(EvaluableNodeImmediateValueType value_type, EvaluableNodeImmediateValue &value, BitArrayIntegerSet &out) + { + if(value_type == ENIVT_NOT_EXIST) + return; + + if(value_type == ENIVT_NULL) + { + //only want nulls that are not numbers + nullIndices.UnionTo(out); + } + else if(value_type == ENIVT_NUMBER) + { + if(FastIsNaN(value.number)) + { + //only want nans + nanIndices.UnionTo(out); + return; + } + + auto [value_index, exact_index_found] = FindExactIndexForValue(value.number); + if(exact_index_found) + out.InsertInBatch(*sortedNumberValueIndexPairs[value_index].second); + } + else if(value_type == ENIVT_STRING_ID) + { + auto id_entry = stringIdValueToIndices.find(value.stringID); + if(id_entry != end(stringIdValueToIndices)) + out.InsertInBatch(*(id_entry->second)); + } + } + + //fills out with the num_to_find min (if findMax == false) or max (find_max == true) entities in the database + //note, if indices_to_consider is not nullptr, will take the intersect, ie out will be set to the num_to_find min or max elements that exist in input indices_to_consider + void FindMinMax(EvaluableNodeImmediateValueType value_type, size_t num_to_find, bool find_max, + BitArrayIntegerSet *indices_to_consider, BitArrayIntegerSet &out) + { + if(value_type == ENIVT_NUMBER) + { + //there are no ids for this column, so return no results + if(sortedNumberValueIndexPairs.size() == 0) + return; + + //search left to right for max (bucket 0 is largest) or right to left for min + int64_t value_index = find_max ? sortedNumberValueIndexPairs.size() - 1 : 0; + + while(value_index < static_cast(sortedNumberValueIndexPairs.size()) && value_index >= 0) + { + //add each index to the out indices and optionally output compute results + for(const auto &index : *sortedNumberValueIndexPairs[value_index].second) + { + if(indices_to_consider != nullptr && !indices_to_consider->contains(index)) + continue; + + out.insert(index); + + //return once we have num_to_find entities + if(out.size() >= num_to_find) + return; + } + + value_index += find_max ? -1 : 1; //search right to right for max or left to right for min + } + } + else if(value_type == ENIVT_STRING_ID) + { + if(stringIdValueToIndices.size() == 0) + return; + + //else it's a string, need to do it the brute force way + std::vector all_sids; + all_sids.reserve(stringIdValueToIndices.size()); + + //get all strings + for(auto &[id, _] : stringIdValueToIndices) + all_sids.push_back(id); + + std::sort(begin(all_sids), end(all_sids), StringIDNaturalCompareSort); + + //search left to right for max (bucket 0 is largest) or right to left for min + int64_t value_index = find_max ? 0 : all_sids.size() - 1; + + while(value_index < static_cast(all_sids.size()) && value_index >= 0) + { + const auto &sid_entry = stringIdValueToIndices.find(all_sids[value_index]); + for(auto index : *(sid_entry->second)) + { + if(indices_to_consider != nullptr && !indices_to_consider->contains(index)) + continue; + + out.insert(index); + + //return once we have num_to_find entities + if(out.size() >= num_to_find) + return; + } + + value_index += find_max ? 1 : -1; //search left to right for max (bucket 0 is largest) or right to left for min + } + } + } + +protected: + + //updates longestStringLength and indexWithLongestString based on parameters + inline void UpdateLongestString(StringInternPool::StringID sid, size_t index) + { + auto &str = string_intern_pool.GetStringFromID(sid); + size_t str_size = StringManipulation::GetUTF8CharacterLength(str); + if(str_size > longestStringLength) + { + longestStringLength = str_size; + indexWithLongestString = index; + } + } + + //updates largestCodeSize and indexWithLargestCode based on parameters + inline void UpdateLargestCode(size_t code_size, size_t index) + { + if(code_size > largestCodeSize) + { + largestCodeSize = code_size; + indexWithLargestCode = index; + } + } + +public: + + //name of the column + StringInternPool::StringID stringId; + + //stores values in sorted order and the entities that have each value + std::vector< std::pair> > sortedNumberValueIndexPairs; + + //maps a string id to a vector of indices that have that string + CompactHashMap> stringIdValueToIndices; + + //for any value that doesn't fit into other values ( ENIVT_CODE ), maps the number of elements in the code + // to the indices of the same size + CompactHashMap> valueCodeSizeToIndices; + + //indices of entities with no value for this feature + EfficientIntegerSet invalidIndices; + + //indices of entities with a number value for this feature + EfficientIntegerSet numberIndices; + + //indices of entities with a string id value for this feature + EfficientIntegerSet stringIdIndices; + + //indices of entities with a null for this feature + EfficientIntegerSet nullIndices; + + //indices of entities with a NaN for this feature + // the entities will also be included in numberIndices + EfficientIntegerSet nanIndices; + + //indices that don't fall into the number/string/null types but are valid + EfficientIntegerSet codeIndices; + + //entity index with the longest string value for this label + size_t indexWithLongestString; + //the longest string length for this label + size_t longestStringLength; + + //entity index with the largest code size for this label + size_t indexWithLargestCode; + //the largest code size for this label + size_t largestCodeSize; +}; diff --git a/src/Amalgam/SeparableBoxFilterDataStore.cpp b/src/Amalgam/SeparableBoxFilterDataStore.cpp new file mode 100644 index 00000000..5083213f --- /dev/null +++ b/src/Amalgam/SeparableBoxFilterDataStore.cpp @@ -0,0 +1,1063 @@ +//project headers: +#include "SeparableBoxFilterDataStore.h" + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) +thread_local +#endif +SeparableBoxFilterDataStore::SBFDSParametersAndBuffers SeparableBoxFilterDataStore::parametersAndBuffers; + +double SeparableBoxFilterDataStore::PopulatePartialSumsWithSimilarFeatureValue(GeneralizedDistance &dist_params, + EvaluableNodeImmediateValue value, EvaluableNodeImmediateValueType value_type, + size_t num_entities_to_populate, bool expand_search_if_optimal, + size_t query_feature_index, size_t absolute_feature_index, BitArrayIntegerSet &enabled_indices) +{ + auto &column = columnData[absolute_feature_index]; + auto feature_type = dist_params.featureParams[query_feature_index].featureType; + + bool value_is_null = (value_type == ENIVT_NULL || (value_type == ENIVT_NUMBER && FastIsNaN(value.number))); + //need to accumulate values for nulls if the value is a null + if(value_is_null) + { + double unknown_unknown_term = dist_params.ComputeDistanceTermUnknownToUnknown(query_feature_index); + AccumulatePartialSums(column->nullIndices, query_feature_index, unknown_unknown_term); + AccumulatePartialSums(column->nanIndices, query_feature_index, unknown_unknown_term); + + //if nominal, need to compute null matches to keep the inner loops fast + // if a data set is mostly nulls, it'll be slower, but this is acceptable as a more rare situation + //if the known-unknown term is less than unknown_unknown (this should be rare if nulls have semantic meaning) + //then need to populate the rest of the cases + double known_unknown_term = dist_params.ComputeDistanceTermKnownToUnknown(query_feature_index); + if(feature_type == FDT_NOMINAL || known_unknown_term < unknown_unknown_term) + { + BitArrayIntegerSet &known_unknown_indices = parametersAndBuffers.potentialMatchesSet; + known_unknown_indices = enabled_indices; + column->nullIndices.EraseTo(known_unknown_indices); + column->nanIndices.EraseTo(known_unknown_indices); + AccumulatePartialSums(known_unknown_indices, query_feature_index, known_unknown_term); + } + + return known_unknown_term; + } + + //need to accumulate nulls if they're closer than an exact match + //but if made it here, then the value itself isn't null + if(dist_params.IsKnownToUnknownDistanceLessThanOrEqualToExactMatch(query_feature_index)) + { + double known_unknown_term = dist_params.ComputeDistanceTermKnownToUnknown(query_feature_index); + AccumulatePartialSums(column->nullIndices, query_feature_index, known_unknown_term); + AccumulatePartialSums(column->nanIndices, query_feature_index, known_unknown_term); + } + + //if nominal, only need to compute the exact match + if(feature_type == FDT_NOMINAL) + { + if(value_type == ENIVT_NUMBER) + { + auto [value_index, exact_index_found] = column->FindExactIndexForValue(value.number); + if(exact_index_found) + { + double term = dist_params.ComputeDistanceTermNominalExactMatch(query_feature_index); + AccumulatePartialSums(*column->sortedNumberValueIndexPairs[value_index].second, query_feature_index, term); + } + } + else if(value_type == ENIVT_STRING_ID) + { + auto value_found = column->stringIdValueToIndices.find(value.stringID); + if(value_found != end(column->stringIdValueToIndices)) + { + double term = dist_params.ComputeDistanceTermNominalExactMatch(query_feature_index); + AccumulatePartialSums(*(value_found->second), query_feature_index, term); + } + } + else if(value_type == ENIVT_CODE) + { + //compute partial sums for all code of matching size + size_t code_size = 1; + if(value_type == ENIVT_CODE) + code_size = EvaluableNode::GetDeepSize(value.code); + + auto value_found = column->valueCodeSizeToIndices.find(code_size); + if(value_found != end(column->valueCodeSizeToIndices)) + { + auto &entity_indices = *(value_found->second); + ComputeAndAccumulatePartialSums(dist_params, value, value_type, + entity_indices, query_feature_index, absolute_feature_index); + } + } + //else value_type == ENIVT_NULL + + //didn't find the value + return dist_params.ComputeDistanceTermNominalNonMatch(query_feature_index); + } + else if(feature_type == FDT_CONTINUOUS_STRING) + { + if(value_type == ENIVT_STRING_ID) + { + auto value_found = column->stringIdValueToIndices.find(value.stringID); + if(value_found != end(column->stringIdValueToIndices)) + { + double term = dist_params.ComputeDistanceTermNonNominalExactMatch(query_feature_index); + AccumulatePartialSums(*(value_found->second), query_feature_index, term); + } + } + + //the next closest string will have an edit distance of 1 + return dist_params.ComputeDistanceTermNonNominalNonCyclicNonNullRegular(1.0, query_feature_index); + } + else if(feature_type == FDT_CONTINUOUS_CODE) + { + //compute partial sums for all code of matching size + size_t code_size = 1; + if(value_type == ENIVT_CODE) + code_size = EvaluableNode::GetDeepSize(value.code); + + auto value_found = column->valueCodeSizeToIndices.find(code_size); + if(value_found != end(column->valueCodeSizeToIndices)) + { + auto &entity_indices = *(value_found->second); + ComputeAndAccumulatePartialSums(dist_params, value, value_type, + entity_indices, query_feature_index, absolute_feature_index); + } + + //next most similar code must be at least a distance of 1 edit away + return dist_params.ComputeDistanceTermNonNominalNonCyclicNonNullRegular(1.0, query_feature_index); + } + //else feature_type == FDT_CONTINUOUS_NUMERIC or FDT_CONTINUOUS_UNIVERSALLY_NUMERIC + + //if not a number or no numbers available, then no size + if(value_type != ENIVT_NUMBER || column->sortedNumberValueIndexPairs.size() == 0) + return GetMaxDistanceTermFromValue(dist_params, value, value_type, query_feature_index, absolute_feature_index); + + bool cyclic_feature = dist_params.IsFeatureCyclic(query_feature_index); + double cycle_length = std::numeric_limits::infinity(); + if(cyclic_feature) + cycle_length = dist_params.featureParams[query_feature_index].typeAttributes.maxCyclicDifference; + + auto [value_index, exact_index_found] = column->FindClosestValueIndexForValue(value.number, cycle_length); + + double term = 0.0; + if(exact_index_found) + term = dist_params.ComputeDistanceTermNonNominalExactMatch(query_feature_index); + else + term = dist_params.ComputeDistanceTermNonNominalNonNullRegular(value.number - column->sortedNumberValueIndexPairs[value_index].first, query_feature_index); + + size_t num_entities_computed = AccumulatePartialSums(*column->sortedNumberValueIndexPairs[value_index].second, query_feature_index, term); + + //the logic below assumes there are at least two entries + size_t num_unique_number_values = column->sortedNumberValueIndexPairs.size(); + if(num_unique_number_values <= 1) + return term; + + //if we haven't filled max_count results, or searched num_buckets, keep expanding search to neighboring buckets + size_t lower_value_index = value_index; + size_t upper_value_index = value_index; + double largest_term = term; + + //used for calculating the gaps between values + double last_diff = 0.0; + double largest_diff_delta = 0.0; + + //put a max limit to the number of cases + size_t max_cases_relative_to_total = std::min(static_cast(2000), static_cast(parametersAndBuffers.partialSums.numInstances / 8) ); + size_t max_num_to_find = std::max(num_entities_to_populate, max_cases_relative_to_total); + + //if one dimension or don't want to expand search, then cut off early + if(!expand_search_if_optimal) + max_num_to_find = num_entities_to_populate; + + //compute along the feature + while(num_entities_computed < max_num_to_find) + { + //see if can compute one bucket lower + bool compute_lower = false; + double lower_diff = 0.0; + size_t next_lower_index = 0; + if(!cyclic_feature) + { + if(lower_value_index > 0) + { + next_lower_index = lower_value_index - 1; + lower_diff = std::abs(value.number - column->sortedNumberValueIndexPairs[next_lower_index].first); + compute_lower = true; + } + } + else //cyclic_feature + { + size_t next_index; + if(lower_value_index > 0) + next_index = lower_value_index - 1; + else + next_index = num_unique_number_values - 1; + + //make sure didn't wrap all the way around for cyclic features + if(next_index != value_index) + { + next_lower_index = next_index; + lower_diff = GeneralizedDistance::ConstrainDifferenceToCyclicDifference(std::abs(value.number - column->sortedNumberValueIndexPairs[next_lower_index].first), cycle_length); + compute_lower = true; + } + } + + //see if can compute one bucket upper + bool compute_upper = false; + double upper_diff = 0.0; + size_t next_upper_index = 0; + if(!cyclic_feature) + { + if(upper_value_index + 1 < num_unique_number_values) + { + next_upper_index = upper_value_index + 1; + upper_diff = std::abs(value.number - column->sortedNumberValueIndexPairs[next_upper_index].first); + compute_upper = true; + } + } + else //cyclic_feature + { + size_t next_index; + if(upper_value_index + 1 < num_unique_number_values) + next_index = upper_value_index + 1; + else + next_index = 0; + + //make sure didn't wrap all the way around for cyclic features + //either from the value itself or overlapping with the next_lower_index + if(next_index != value_index) + { + if((!compute_lower || next_index != next_lower_index)) + { + next_upper_index = next_index; + upper_diff = GeneralizedDistance::ConstrainDifferenceToCyclicDifference(std::abs(value.number - column->sortedNumberValueIndexPairs[next_upper_index].first), cycle_length); + compute_upper = true; + } + else //upper and lower have overlapped, want to exit the loop + next_upper_index = next_lower_index; + } + } + + //determine the next closest point and its difference + double next_closest_diff; + size_t next_closest_index; + + //if can only compute lower or lower is closer, then compute lower + if( (compute_lower && !compute_upper) + || (compute_lower && compute_upper && lower_diff < upper_diff) ) + { + next_closest_diff = lower_diff; + next_closest_index = next_lower_index; + lower_value_index = next_lower_index; + } + else if(compute_upper) + { + next_closest_diff = upper_diff; + next_closest_index = next_upper_index; + upper_value_index = next_upper_index; + } + else //nothing left, end + { + break; + } + + //if running into the extra_iterations + if(num_entities_computed >= num_entities_to_populate) + { + //use heuristic to decide whether to continue populating based on whether this diff will help the overall distance cutoffs + // look at the rate of change of the difference compared to before, and how many new entities will be populated + // if it is too small and doesn't fill enough (or fills too many), then stop expanding + size_t potential_entities = column->sortedNumberValueIndexPairs[next_closest_index].second->size(); + if(num_entities_computed + potential_entities > max_num_to_find) + break; + + //determine if it should continue based on how much this difference will contribute to the total; either a big jump or enough entities + bool should_continue = false; + double diff_delta = next_closest_diff - last_diff; + + if(diff_delta >= largest_diff_delta) + should_continue = true; + + if(diff_delta >= largest_diff_delta / 2 && potential_entities >= 2) + should_continue = true; + + //going out n deviations is likely to only miss 0.5^n of the likely values of nearest neighbors + // so 0.5^5 should catch ~97% of the values + if(dist_params.DoesFeatureHaveDeviation(query_feature_index) + && next_closest_diff < 5 * dist_params.featureParams[query_feature_index].deviation) + should_continue = true; + + if(!should_continue) + break; + } + + term = dist_params.ComputeDistanceTermNonNominalNonNullRegular(next_closest_diff, query_feature_index); + num_entities_computed += AccumulatePartialSums(*column->sortedNumberValueIndexPairs[next_closest_index].second, query_feature_index, term); + + //track the rate of change of difference + if(next_closest_diff - last_diff > largest_diff_delta) + largest_diff_delta = next_closest_diff - last_diff; + last_diff = next_closest_diff; + + //keep track of the largest seen so far + if(term > largest_term) + largest_term = term; + + //if cyclic and have wrapped around, then exit + if(lower_value_index >= upper_value_index) + break; + } + + //return the largest computed so far + return largest_term; +} + +void SeparableBoxFilterDataStore::PopulateInitialPartialSums(GeneralizedDistance &dist_params, size_t top_k, size_t num_enabled_features, + BitArrayIntegerSet &enabled_indices, std::vector &min_unpopulated_distances, std::vector &min_distance_by_unpopulated_count) +{ + size_t num_entities_to_populate = top_k; + //populate sqrt(2)^p * top_k, which will yield 2 for p=2, 1 for p=0, and about 1.2 for p=0.5 + if(num_enabled_features > 1) + num_entities_to_populate = static_cast(std::lround(FastPow(GeneralizedDistance::s_sqrt_2, dist_params.pValue) * top_k)) + 1; + + min_unpopulated_distances.resize(num_enabled_features); + for(size_t i = 0; i < num_enabled_features; i++) + { + double next_closest_distance = PopulatePartialSumsWithSimilarFeatureValue(dist_params, + parametersAndBuffers.targetValues[i], parametersAndBuffers.targetValueTypes[i], + num_entities_to_populate, + //expand search if using more than one dimension + num_enabled_features > 1 , + i, parametersAndBuffers.targetColumnIndices[i], enabled_indices); + + min_unpopulated_distances[i] = next_closest_distance; + } + std::sort(begin(min_unpopulated_distances), end(min_unpopulated_distances)); + + //compute min distance based on the number of features that are unpopulated + min_distance_by_unpopulated_count.clear(); + //need to add a 0 for when all distances are computed + min_distance_by_unpopulated_count.push_back(0.0); + //append all of the sorted distances so they can be accumulated and assigned + min_distance_by_unpopulated_count.insert(end(min_distance_by_unpopulated_count), begin(min_unpopulated_distances), end(min_unpopulated_distances)); + for(size_t i = 1; i < min_distance_by_unpopulated_count.size(); i++) + min_distance_by_unpopulated_count[i] += min_distance_by_unpopulated_count[i - 1]; +} + +void SeparableBoxFilterDataStore::PopulatePotentialGoodMatches(FlexiblePriorityQueue> &potential_good_matches, + BitArrayIntegerSet &enabled_indices, PartialSumCollection &partial_sums, size_t top_k) +{ + potential_good_matches.clear(); + potential_good_matches.Reserve(top_k); + + //first, build up top_k that have at least one feature + size_t entity_index = 0; + size_t indices_considered = 0; + size_t end_index = enabled_indices.GetEndInteger(); + for(; entity_index < end_index; entity_index++) + { + //don't need to check maximum index, because already checked in loop + if(!enabled_indices.ContainsWithoutMaximumIndexCheck(entity_index)) + continue; + + indices_considered++; + + auto [num_calculated_feature_deltas, cur_sum] = partial_sums.GetNumFilledAndSum(entity_index); + if(num_calculated_feature_deltas == 0) + continue; + + potential_good_matches.emplace(num_calculated_feature_deltas, cur_sum, entity_index); + if(potential_good_matches.size() == top_k) + { + entity_index++; + break; + } + } + + //heuristically attempt to find some cases with the most number of features calculated (by the closest matches) and the lowest distances + //iterate until at least index_end / e cases are seen, but cap at a maximum number + size_t total_indices = enabled_indices.size(); + size_t num_indices_to_consider = static_cast(std::floor(total_indices * 0.3678794411714)); + num_indices_to_consider = std::min(static_cast(1000), num_indices_to_consider); + + //find a good number of features based on the discrete logarithm of the number of features + size_t good_number_of_features = 0; + size_t num_features = partial_sums.numDimensions; + while(num_features >>= 1) + good_number_of_features++; + + //start with requiring at least one feature matching to be considered a good match + size_t good_match_threshold_count = 1; + double good_match_threshold_value = std::numeric_limits::infinity(); + if(potential_good_matches.size() > 0) + { + const auto &top = potential_good_matches.top(); + good_match_threshold_count = top.count; + good_match_threshold_value = top.distance; + } + + //continue on starting at the next unexamined index until have seen at least max_considerable_good_index + // or k filled with entities having good_number_of_features calculated + for(; indices_considered < num_indices_to_consider && entity_index < end_index; entity_index++) + { + //don't need to check maximum index, because already checked in loop + if(!enabled_indices.ContainsWithoutMaximumIndexCheck(entity_index)) + continue; + + indices_considered++; + + auto [num_calculated_feature_deltas, cur_sum] = partial_sums.GetNumFilledAndSum(entity_index); + //skip if not good enough + if(num_calculated_feature_deltas < good_match_threshold_count) + continue; + + //either needs to exceed the calculated features or have smaller distance + if(num_calculated_feature_deltas > good_match_threshold_count + || cur_sum < good_match_threshold_value) + { + //have top_k, but this one is better + potential_good_matches.emplace(num_calculated_feature_deltas, cur_sum, entity_index); + potential_good_matches.pop(); + + const auto &top = potential_good_matches.top(); + good_match_threshold_count = top.count; + good_match_threshold_value = top.distance; + + //if have found enough features, stop searching + if(good_match_threshold_count >= good_number_of_features) + break; + } + } +} + +size_t SeparableBoxFilterDataStore::AddLabelsAsEmptyColumns(std::vector &label_ids, size_t num_entities) +{ + size_t num_existing_columns = columnData.size(); + size_t num_inserted_columns = 0; + + //create columns for the labels, don't count any that already exist + for(auto label_id : label_ids) + { + auto [_, inserted] = labelIdToColumnIndex.insert(std::make_pair(label_id, columnData.size())); + if(inserted) + { + columnData.emplace_back(std::make_unique(label_id)); + num_inserted_columns++; + } + } + + //if nothing has been populated, then just create an empty matrix + if(matrix.size() == 0) + { + numEntities = num_entities; + matrix.resize(columnData.size() * numEntities); + return num_inserted_columns; + } + + //expand the matrix to add the empty columns + std::vector old_matrix; + std::swap(old_matrix, matrix); //swap data pointers to free old memory + matrix.resize(columnData.size() * numEntities); + + size_t num_columns_new = columnData.size(); + + //copy over existing data in blocks per entity + for(size_t i = 0; i < num_entities; i++) + memcpy((char *)&matrix[i * num_columns_new], (char *)&old_matrix[i * num_existing_columns], sizeof(EvaluableNodeImmediateValue) * num_existing_columns); + + //update the number of entities + numEntities = num_entities; + + return num_inserted_columns; +} + +void SeparableBoxFilterDataStore::RemoveColumnIndex(size_t column_index_to_remove) +{ + //will replace the values at index_to_remove with the values at index_to_move + size_t column_index_to_move = columnData.size() - 1; + + size_t label_id = columnData[column_index_to_remove]->stringId; + + size_t num_columns = columnData.size(); + + //move data from the last column to the removed column if removing the label_id isn't the last column + if(column_index_to_remove != column_index_to_move) + { + for(size_t i = 0; i < numEntities; i++) + matrix[i * num_columns + column_index_to_remove] = matrix[i * num_columns + column_index_to_move]; + + //update column lookup + size_t label_id_to_move = columnData[column_index_to_move]->stringId; + labelIdToColumnIndex[label_id_to_move] = column_index_to_remove; + + //rearrange columns + std::swap(columnData[column_index_to_remove], columnData[column_index_to_move]); + } + + //remove the columnId lookup, reference, and column + labelIdToColumnIndex.erase(label_id); + columnData.pop_back(); + + //create new smaller container to hold the reduced data + std::vector old_matrix; + std::swap(old_matrix, matrix); + + //if no columns left, then done + if(columnData.size() == 0) + return; + + //move data over to new reduced copy of matrix + matrix.resize(columnData.size() * numEntities); + for(size_t i = 0; i < numEntities; i++) + memcpy((char *)&matrix[i * columnData.size()], (char *)&old_matrix[i * (columnData.size() + 1)], sizeof(EvaluableNodeImmediateValue) * (columnData.size())); +} + +void SeparableBoxFilterDataStore::DeleteEntityIndexFromColumns(size_t index) +{ + for(size_t i = 0; i < columnData.size(); i++) + { + auto &feature_value = GetValue(index, i); + columnData[i]->DeleteIndexValue(feature_value, index); + } +} + +//populates distances_out with all entities and their distances that have a distance to target less than max_dist +// and sets distances_out to the found entities. Infinity is allowed to compute all distances. +//if enabled_indices is not nullptr, it will only find distances to those entities, and it will modify enabled_indices in-place +// removing entities that do not have the corresponding labels +void SeparableBoxFilterDataStore::FindEntitiesWithinDistance(GeneralizedDistance &dist_params, std::vector &position_label_ids, + std::vector &position_values, std::vector &position_value_types, double max_dist, + BitArrayIntegerSet &enabled_indices, std::vector> &distances_out) +{ + if(GetNumInsertedEntities() == 0) + return; + + //look up these data structures upfront for performance + auto &target_column_indices = parametersAndBuffers.targetColumnIndices; + auto &target_values = parametersAndBuffers.targetValues; + auto &target_value_types = parametersAndBuffers.targetValueTypes; + PopulateTargetValuesAndLabelIndices(dist_params, position_label_ids, position_values, position_value_types); + if(target_values.size() == 0) + return; + + PopulateUnknownFeatureValueTerms(dist_params); + + //Starting with all entities, narrow down the list by incrementally summing up the minkowski distances + const double max_dist_exponentiated = std::pow(max_dist, dist_params.pValue); //max_dist ^ p >= MinkowskiDistanceSum + + //initialize all distances to 0 + auto &distances = parametersAndBuffers.entityDistances; + distances.clear(); + distances.resize(GetNumInsertedEntities(), 0.0); + + //remove any entities that are missing labels + for(auto absolute_feature_index : target_column_indices) + columnData[absolute_feature_index]->invalidIndices.EraseInBatchFrom(enabled_indices); + enabled_indices.UpdateNumElements(); + + //for each desired feature, compute and add distance terms of possible window query candidate entities + for(size_t query_feature_index = 0; query_feature_index < target_column_indices.size(); query_feature_index++) + { + size_t absolute_feature_index = target_column_indices[query_feature_index]; + auto target_value = target_values[query_feature_index]; + auto target_value_type = target_value_types[query_feature_index]; + + auto &column_data = columnData[absolute_feature_index]; + + if(target_value_type == ENIVT_NULL || (target_value_type == ENIVT_NUMBER && FastIsNaN(target_value.number)) ) + { + //add the appropriate unknown distance to each element + double unknown_unknown_term = dist_params.ComputeDistanceTermUnknownToUnknown(query_feature_index); + double known_unknown_term = dist_params.ComputeDistanceTermKnownToUnknown(query_feature_index); + + auto &null_indices = column_data->nullIndices; + auto &nan_indices = column_data->nanIndices; + for(auto entity_index : enabled_indices) + { + if(null_indices.contains(entity_index) || nan_indices.contains(entity_index)) + distances[entity_index] += unknown_unknown_term; + else + distances[entity_index] += known_unknown_term; + + //remove entity if its distance is already greater than the max_dist + if(!(distances[entity_index] <= max_dist_exponentiated)) //false for NaN indices as well so they will be removed + enabled_indices.erase(entity_index); + } + + continue; + } + + if(target_value_type == ENIVT_NUMBER) + { + //below we branch to optimize the number of distance terms that need to be computed to solve minimum distance problem + //if there are fewer enabled_indices than the number of unique values for this feature, plus one for unknown values + // it is usually faster (less distances to compute) to just compute distance for each unique value and add to associated sums + // unless it happens to be that enabled_indices is very skewed + if(column_data->sortedNumberValueIndexPairs.size() < enabled_indices.size()) + { + for(auto &[entity_list_value, entity_list] : column_data->sortedNumberValueIndexPairs) + { + //get distance term that is applicable to each entity in this bucket + double distance_term = dist_params.ComputeDistanceTermRegularOneNonNull(target_value.number - entity_list_value, query_feature_index); + + //for each bucket, add term to their sums + for(auto entity_index : *entity_list) + { + if(!enabled_indices.contains(entity_index)) + continue; + + distances[entity_index] += distance_term; + + //remove entity if its distance is already greater than the max_dist, won't ever become NaN here (would already have been removed from indices) + if(!(distances[entity_index] <= max_dist_exponentiated)) //false for NaN indices as well so they will be removed + enabled_indices.erase(entity_index); + } + } + + //populate all non-number distances + double unknown_dist = dist_params.ComputeDistanceTermKnownToUnknown(query_feature_index); + for(auto entity_index : enabled_indices) + { + //skip over number values + if(column_data->numberIndices.contains(entity_index)) + continue; + + distances[entity_index] += unknown_dist; + + //remove entity if its distance is already greater than the max_dist + if(!(distances[entity_index] <= max_dist_exponentiated)) //false for NaN indices as well so they will be removed + enabled_indices.erase(entity_index); + } + + continue; + } + } + + //if target_value_type == ENIVT_CODE or ENIVT_STRING_ID, just compute all + // won't save much for code until cache equal values + // won't save much for string ids because it's just a lookup (though could make it a little faster by streamlining a specialized string loop) + + //else, there are less indices to consider than possible unique values, so save computation by just considering entities that are still valid + for(auto entity_index : enabled_indices) + { + auto &value = GetValue(entity_index, absolute_feature_index); + auto value_type = column_data->GetIndexValueType(entity_index); + + distances[entity_index] += dist_params.ComputeDistanceTermRegular(target_value, value, target_value_type, value_type, query_feature_index); + + //remove entity if its distance is already greater than the max_dist + if(!(distances[entity_index] <= max_dist_exponentiated)) //false for NaN indices as well so they will be removed + enabled_indices.erase(entity_index); + } + } + + //populate distances_out vector + distances_out.reserve(enabled_indices.size()); + bool need_recompute_distances = (dist_params.recomputeAccurateDistances && !dist_params.highAccuracy); + if(!need_recompute_distances) + { + for(auto index : enabled_indices) + distances_out.emplace_back(dist_params.InverseExponentiateDistance(distances[index]), index); + } + else + { + dist_params.SetHighAccuracy(true); + for(auto index : enabled_indices) + distances_out.emplace_back(GetDistanceBetween(dist_params, target_values, target_value_types, target_column_indices, index), index); + } +} + +void SeparableBoxFilterDataStore::FindEntitiesNearestToIndexedEntity(GeneralizedDistance *dist_params_ref, std::vector &position_label_ids, + bool constant_dist_params, size_t search_index, size_t top_k, BitArrayIntegerSet &enabled_indices, + bool expand_to_first_nonzero_distance, std::vector> &distances_out, size_t ignore_index, RandomStream rand_stream) +{ + if(top_k == 0 || GetNumInsertedEntities() == 0) + return; + + GeneralizedDistance *dist_params = dist_params_ref; + if(constant_dist_params) + { + dist_params = ¶metersAndBuffers.distParams; + *dist_params = *dist_params_ref; + } + + //build target + auto &target_column_indices = parametersAndBuffers.targetColumnIndices; + target_column_indices.clear(); + + auto &target_values = parametersAndBuffers.targetValues; + target_values.clear(); + + auto &target_value_types = parametersAndBuffers.targetValueTypes; + target_value_types.clear(); + + const size_t matrix_index_base = search_index * columnData.size(); + for(size_t i = 0; i < position_label_ids.size(); i++) + { + auto found = labelIdToColumnIndex.find(position_label_ids[i]); + if(found == end(labelIdToColumnIndex)) + continue; + + if(dist_params->IsFeatureEnabled(i)) + { + size_t column_index = found->second; + + auto &value = matrix[matrix_index_base + column_index]; + auto value_type = columnData[column_index]->GetIndexValueType(search_index); + + PopulateNextTargetAttributes(*dist_params, + target_column_indices, target_values, target_value_types, + column_index, value, value_type, + dist_params->featureParams[i].featureType); + } + } + + PopulateUnknownFeatureValueTerms(*dist_params); + + size_t num_enabled_features = target_values.size(); + + //make a copy of the entities so that the list can be modified + BitArrayIntegerSet &possible_knn_indices = parametersAndBuffers.potentialMatchesSet; + possible_knn_indices = enabled_indices; + + //remove search_index and ignore_index + possible_knn_indices.erase(search_index); + possible_knn_indices.erase(ignore_index); + + //remove invalid cases + for(size_t absolute_feature_index : target_column_indices) + columnData[absolute_feature_index]->invalidIndices.EraseInBatchFrom(possible_knn_indices); + possible_knn_indices.UpdateNumElements(); + + //if num enabled indices < top_k, return sorted distances + if(GetNumInsertedEntities() <= top_k || possible_knn_indices.size() <= top_k) + return FindAllValidElementDistances(*dist_params, target_column_indices, target_values, target_value_types, possible_knn_indices, distances_out, rand_stream); + + size_t end_index = possible_knn_indices.GetEndInteger(); + + //reuse the appropriate partial_sums_buffer buffer + auto &partial_sums = parametersAndBuffers.partialSums; + partial_sums.ResizeAndClear(num_enabled_features, end_index); + + //calculate the partial sums for the cases that best match for each feature + // and populate the vectors of smallest possible distances that haven't been computed yet + auto &min_unpopulated_distances = parametersAndBuffers.minUnpopulatedDistances; + auto &min_distance_by_unpopulated_count = parametersAndBuffers.minDistanceByUnpopulatedCount; + PopulateInitialPartialSums(*dist_params, top_k, num_enabled_features, possible_knn_indices, min_unpopulated_distances, min_distance_by_unpopulated_count); + + auto &potential_good_matches = parametersAndBuffers.potentialGoodMatches; + PopulatePotentialGoodMatches(potential_good_matches, possible_knn_indices, partial_sums, top_k); + + //reuse, clear, and set up sorted_results + auto &sorted_results = parametersAndBuffers.sortedResults; + sorted_results.clear(); + sorted_results.SetStream(rand_stream); + sorted_results.Reserve(top_k); + + //parse the sparse inline hash of good match nodes directly into the compacted vector of good matches + while(potential_good_matches.size() > 0) + { + size_t entity_index = potential_good_matches.top().reference; + + //insert random selection into results heap + double distance = ResolveDistanceToNonMatchTargetValues(*dist_params, + target_column_indices, target_values, target_value_types, partial_sums, entity_index, num_enabled_features); + sorted_results.Push(DistanceReferencePair(distance, entity_index)); + + //skip this entity in the next loops + possible_knn_indices.erase(entity_index); + + potential_good_matches.pop(); + } + + //if we did not find K results (search failed), we must populate the remaining K cases/results to search from another way + //we will randomly select additional nodes to fill K results. random to prevent bias/patterns + while(sorted_results.Size() < top_k && possible_knn_indices.size() > 0) + { + //get a random index that is still potentially in the knn (neither rejected nor already in the results) + size_t random_index = possible_knn_indices.GetRandomElement(rand_stream); + + double distance = ResolveDistanceToNonMatchTargetValues(*dist_params, + target_column_indices, target_values, target_value_types, partial_sums, random_index, num_enabled_features); + sorted_results.Push(DistanceReferencePair(distance, random_index)); + + //skip this entity in the next loops + possible_knn_indices.erase(random_index); + } + + //cache kth smallest distance to target search node + double worst_candidate_distance = std::numeric_limits::infinity(); + if(sorted_results.Size() == top_k) + { + double top_distance = sorted_results.Top().distance; + //don't clamp top distance if we're expanding and only have 0 distances + if(! (expand_to_first_nonzero_distance && top_distance == 0.0) ) + worst_candidate_distance = top_distance; + } + + //execute window query, with dynamically shrinking bounds + for(const size_t entity_index : possible_knn_indices) + { + //if still accepting new candidates because found only zero distances + if(worst_candidate_distance == std::numeric_limits::infinity()) + { + double distance = ResolveDistanceToNonMatchTargetValues(*dist_params, + target_column_indices, target_values, target_value_types, partial_sums, entity_index, num_enabled_features); + sorted_results.Push(DistanceReferencePair(distance, entity_index)); + + //if full, update worst_candidate_distance + if(sorted_results.Size() >= top_k) + { + double top_distance = sorted_results.Top().distance; + //don't clamp top distance if we're expanding and only have 0 distances + if(!(expand_to_first_nonzero_distance && top_distance == 0.0)) + worst_candidate_distance = top_distance; + } + + continue; + } + + //already have enough elements, but see if this one is good enough + auto [accept, distance] = ResolveDistanceToNonMatchTargetValues(*dist_params, + target_column_indices, target_values, target_value_types, partial_sums, + entity_index, min_distance_by_unpopulated_count, num_enabled_features, worst_candidate_distance, min_unpopulated_distances); + + if(!accept) + continue; + + //if not expanding and pushing a zero distance onto the stack, then push and pop a value onto the stack + if(!(expand_to_first_nonzero_distance && distance == 0.0)) + worst_candidate_distance = sorted_results.PushAndPop(DistanceReferencePair(distance, entity_index)).distance; + else //adding a zero and need to expand beyond zeros + { + //add the zero + sorted_results.Push(DistanceReferencePair(distance, entity_index)); + + //make copy of the top and pop it + DistanceReferencePair drp = sorted_results.Top(); + sorted_results.Pop(); + + //if the next largest size is zero, then need to put the non-zero value back in sorted_results + if(sorted_results.Size() > 0 && sorted_results.Top().distance == 0) + sorted_results.Push(drp); + } + } + + //return k nearest -- don't need to clear because the values will be clobbered + distances_out.resize(sorted_results.Size()); + bool need_recompute_distances = (dist_params->recomputeAccurateDistances && !dist_params->highAccuracy); + if(need_recompute_distances) + dist_params->SetHighAccuracy(true); + + while(sorted_results.Size() > 0) + { + auto &drp = sorted_results.Top(); + double distance; + if(!need_recompute_distances) + distance = dist_params->InverseExponentiateDistance(drp.distance); + else + distance = GetDistanceBetween(*dist_params, target_values, target_value_types, target_column_indices, drp.reference); + + distances_out[sorted_results.Size() - 1] = DistanceReferencePair(distance, drp.reference); + sorted_results.Pop(); + } +} + +void SeparableBoxFilterDataStore::FindNearestEntities(GeneralizedDistance &dist_params, std::vector &position_label_ids, + std::vector &position_values, std::vector &position_value_types, size_t top_k, + size_t ignore_entity_index, BitArrayIntegerSet &enabled_indices, std::vector> &distances_out, RandomStream rand_stream) +{ + if(top_k == 0 || GetNumInsertedEntities() == 0) + return; + + //look up these data structures upfront for performance + auto &target_column_indices = parametersAndBuffers.targetColumnIndices; + auto &target_values = parametersAndBuffers.targetValues; + auto &target_value_types = parametersAndBuffers.targetValueTypes; + PopulateTargetValuesAndLabelIndices(dist_params, position_label_ids, position_values, position_value_types); + + size_t num_enabled_features = target_values.size(); + if(num_enabled_features == 0) + return; + + PopulateUnknownFeatureValueTerms(dist_params); + + //ignore cases with missing labels + for(size_t i = 0; i < num_enabled_features; i++) + columnData[target_column_indices[i]]->invalidIndices.EraseInBatchFrom(enabled_indices); + enabled_indices.UpdateNumElements(); + + enabled_indices.erase(ignore_entity_index); + + //if num enabled indices < top_k, return sorted distances + if(enabled_indices.size() <= top_k) + return FindAllValidElementDistances(dist_params, target_column_indices, target_values, target_value_types, enabled_indices, distances_out, rand_stream); + + //one past the maximum entity index to be considered + size_t end_index = enabled_indices.GetEndInteger(); + + //reuse the appropriate partial_sums_buffer buffer + auto &partial_sums = parametersAndBuffers.partialSums; + partial_sums.ResizeAndClear(num_enabled_features, end_index); + + //calculate the partial sums for the cases that best match for each feature + // and populate the vectors of smallest possible distances that haven't been computed yet + auto &min_unpopulated_distances = parametersAndBuffers.minUnpopulatedDistances; + auto &min_distance_by_unpopulated_count = parametersAndBuffers.minDistanceByUnpopulatedCount; + PopulateInitialPartialSums(dist_params, top_k, num_enabled_features, enabled_indices, min_unpopulated_distances, min_distance_by_unpopulated_count); + + auto &potential_good_matches = parametersAndBuffers.potentialGoodMatches; + PopulatePotentialGoodMatches(potential_good_matches, enabled_indices, partial_sums, top_k); + + //reuse, clear, and set up sorted_results + auto &sorted_results = parametersAndBuffers.sortedResults; + sorted_results.clear(); + sorted_results.SetStream(rand_stream.CreateOtherStreamViaRand()); + sorted_results.Reserve(top_k); + + //parse the sparse inline hash of good match nodes directly into the compacted vector of good matches + while(potential_good_matches.size() > 0) + { + size_t good_match_index = potential_good_matches.top().reference; + potential_good_matches.pop(); + + //skip this entity in the next loops + enabled_indices.erase(good_match_index); + + double distance = ResolveDistanceToNonMatchTargetValues(dist_params,\ + target_column_indices, target_values, target_value_types, partial_sums, good_match_index, num_enabled_features); + sorted_results.Push(DistanceReferencePair(distance, good_match_index)); + } + + //if we did not find top_k results (search failed), attempt to randomly fill the top k with random results + // to remove biases that might slow down performance + while(sorted_results.Size() < top_k) + { + //find a random case index + size_t random_index = enabled_indices.GetRandomElement(rand_stream); + + //skip this entity in the next loops + enabled_indices.erase(random_index); + + double distance = ResolveDistanceToNonMatchTargetValues(dist_params, + target_column_indices, target_values, target_value_types, partial_sums, random_index, num_enabled_features); + sorted_results.Push(DistanceReferencePair(distance, random_index)); + } + + auto &previous_nn_cache = parametersAndBuffers.previousQueryNearestNeighbors; + + //have already gone through all records looking for top_k, if don't have top_k, then have exhausted search + if(sorted_results.Size() == top_k) + { + double worst_candidate_distance = sorted_results.Top().distance; + if(num_enabled_features > 1) + { + for(size_t entity_index : previous_nn_cache) + { + //only get its distance if it is enabled, + //but erase to skip this entity in the next loop + if(!enabled_indices.EraseAndRetrieve(entity_index)) + continue; + + auto [accept, distance] = ResolveDistanceToNonMatchTargetValues(dist_params, + target_column_indices, target_values, target_value_types, partial_sums, entity_index, + min_distance_by_unpopulated_count, num_enabled_features, worst_candidate_distance, min_unpopulated_distances); + + if(accept) + worst_candidate_distance = sorted_results.PushAndPop(DistanceReferencePair(distance, entity_index)).distance; + } + } + + //check to see if any features can have nulls quickly removed because it would push it past worst_candidate_distance + bool need_enabled_indices_recount = false; + for(size_t i = 0; i < num_enabled_features; i++) + { + //if the target_value is a null/nan, unknown-unknown differences have already been accounted for + //since they are partial matches + if(target_value_types[i] == ENIVT_NULL || (target_value_types[i] == ENIVT_NUMBER && FastIsNaN(target_values[i].number))) + continue; + + if(dist_params.ComputeDistanceTermKnownToUnknown(i) > worst_candidate_distance) + { + auto &column = columnData[target_column_indices[i]]; + auto &null_indices = column->nullIndices; + //make sure there's enough nulls to justify running through all of enabled_indices + if(null_indices.size() > 20) + { + null_indices.EraseInBatchFrom(enabled_indices); + need_enabled_indices_recount = true; + } + + auto &nan_indices = column->nanIndices; + //make sure there's enough nulls to justify running through all of enabled_indices + if(nan_indices.size() > 20) + { + nan_indices.EraseInBatchFrom(enabled_indices); + need_enabled_indices_recount = true; + } + } + } + if(need_enabled_indices_recount) + enabled_indices.UpdateNumElements(); + + //if have removed some from the end, reduce the range + end_index = enabled_indices.GetEndInteger(); + + //pick up where left off, already have top_k in sorted_results or are out of entities + #pragma omp parallel shared(worst_candidate_distance) if(end_index > 200) + { + //iterate over all indices + #pragma omp for schedule(static) + for(int64_t entity_index = 0; entity_index < static_cast(end_index); entity_index++) + { + //don't need to check maximum index, because already checked in loop + if(!enabled_indices.ContainsWithoutMaximumIndexCheck(entity_index)) + continue; + + auto [accept, distance] = ResolveDistanceToNonMatchTargetValues(dist_params, + target_column_indices, target_values, target_value_types, partial_sums, entity_index, + min_distance_by_unpopulated_count, num_enabled_features, worst_candidate_distance, min_unpopulated_distances); + + if(!accept) + continue; + + #ifdef _OPENMP + #pragma omp critical + { + //need to check again after going into critical section + if(distance <= worst_candidate_distance) + { + #endif + //computed the actual distance here, attempt to insert into final sorted results + worst_candidate_distance = sorted_results.PushAndPop(DistanceReferencePair(distance, entity_index)).distance; + + #ifdef _OPENMP + } + } + #endif + + } //for partialSums instances + } //#pragma omp parallel + + } // sorted_results.Size() == top_k + + //return and cache k nearest -- don't need to clear because the values will be clobbered + size_t num_results = sorted_results.Size(); + distances_out.resize(num_results); + previous_nn_cache.resize(num_results); + bool need_recompute_distances = (dist_params.recomputeAccurateDistances && !dist_params.highAccuracy); + if(need_recompute_distances) + dist_params.SetHighAccuracy(true); + + while(sorted_results.Size() > 0) + { + auto &drp = sorted_results.Top(); + double distance; + if(!need_recompute_distances) + distance = dist_params.InverseExponentiateDistance(drp.distance); + else + distance = GetDistanceBetween(dist_params, target_values, target_value_types, target_column_indices, drp.reference); + + size_t output_index = sorted_results.Size() - 1; + distances_out[output_index] = DistanceReferencePair(distance, drp.reference); + previous_nn_cache[output_index] = drp.reference; + + sorted_results.Pop(); + } +} diff --git a/src/Amalgam/SeparableBoxFilterDataStore.h b/src/Amalgam/SeparableBoxFilterDataStore.h new file mode 100644 index 00000000..054c975b --- /dev/null +++ b/src/Amalgam/SeparableBoxFilterDataStore.h @@ -0,0 +1,1078 @@ +#pragma once + +//------------------------------------------------------------------------------------------------------------------------------------- +//Seperable Box-Filter Data Store +//Spatial acceleration database for high-dimensional data with no constraints on metric space (Minkowski, Euclidean, LK, etc). +//The structure can efficiently search for data when using different metric space parameters without being rebuilt. +//------------------------------------------------------------------------------------------------------------------------------------- + +//project headers: +#include "Concurrency.h" +#include "FastMath.h" +#include "Entity.h" +#include "EntityQueriesStatistics.h" +#include "EvaluableNode.h" +#include "IntegerSet.h" +#include "GeneralizedDistance.h" +#include "PartialSum.h" +#include "SBFDSColumnData.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +//supports cheap modification of: +//p-value, nominals, weights, distance accuracy, feature selections, case sub-selections +//requires minor updates for adding cases and features beyond initial dimensions +class SeparableBoxFilterDataStore +{ +public: + + //contains the parameters and buffers to perform find operations on the SBFDS + // for multithreading, there should be one of these per thread + struct SBFDSParametersAndBuffers + { + //buffers for finding nearest cases + std::vector targetValues; + std::vector targetValueTypes; + std::vector targetColumnIndices; + PartialSumCollection partialSums; + std::vector minUnpopulatedDistances; + std::vector minDistanceByUnpopulatedCount; + std::vector entityDistances; + + //when a local copy of distance params is needed + GeneralizedDistance distParams; + + BitArrayIntegerSet potentialMatchesSet; + BitArrayIntegerSet nonMatchesSet; + + std::vector> entitiesWithValues; + + FlexiblePriorityQueue> potentialGoodMatches; + StochasticTieBreakingPriorityQueue> sortedResults; + + //cache of nearest neighbors from previous query + std::vector previousQueryNearestNeighbors; + }; + + SeparableBoxFilterDataStore() + { + numEntities = 0; + } + + //Gets the maximum possible distance term from value + // absolute_feature_index is the offset to access the feature relative to the entire data store + // query_feature_index is relative to dist_params + inline double GetMaxDistanceTermFromValue(GeneralizedDistance &dist_params, + EvaluableNodeImmediateValue &value, EvaluableNodeImmediateValueType value_type, + size_t query_feature_index, size_t absolute_feature_index) + { + if(dist_params.IsFeatureNominal(query_feature_index)) + return dist_params.ComputeDistanceTermNominalNonMatch(query_feature_index); + + double max_diff = columnData[absolute_feature_index]->GetMaxDifferenceTermFromValue( + dist_params.featureParams[query_feature_index], value_type, value); + return dist_params.ComputeDistanceTermNonNominalNonNullRegular(max_diff, query_feature_index); + } + + //gets the matrix cell index for the specified index + __forceinline const size_t GetMatrixCellIndex(size_t entity_index) + { + return entity_index * columnData.size(); + } + + //returns the the element at index's value for the specified column at column_index, requires valid index + __forceinline EvaluableNodeImmediateValue &GetValue(size_t index, size_t column_index) + { + return matrix[index * columnData.size() + column_index]; + } + + //returns the column index for the label_id, or maximum value if not found + inline size_t GetColumnIndexFromLabelId(size_t label_id) + { + auto column = labelIdToColumnIndex.find(label_id); + if(column == end(labelIdToColumnIndex)) + return std::numeric_limits::max(); + return column->second; + } + + //returns true if the structure already has the label + inline bool DoesHaveLabel(size_t label_id) + { + return (labelIdToColumnIndex.count(label_id) > 0); + } + + //populates the matrix with the label and builds column data + // assumes column data is empty + void BuildLabel(size_t column_index, const std::vector &entities) + { + auto &column_data = columnData[column_index]; + auto label_id = column_data->stringId; + + auto &entities_with_number_values = parametersAndBuffers.entitiesWithValues; + entities_with_number_values.clear(); + + //populate matrix and get values + // maintaining the order of insertion of the entities from smallest to largest allows for better performance of the insertions + // and every function called here assumes that entities are inserted in increasing order + for(size_t entity_index = 0; entity_index < entities.size(); entity_index++) + { + EvaluableNodeImmediateValueType value_type; + EvaluableNodeImmediateValue value; + value_type = entities[entity_index]->GetValueAtLabelAsImmediateValue(label_id, value); + matrix[GetMatrixCellIndex(entity_index) + column_index] = value; + + column_data->InsertNextIndexValueExceptNumbers(value_type, value, entity_index, entities_with_number_values); + } + + //sort the number values for efficient insertion, but keep the entities in their order + std::stable_sort(begin(entities_with_number_values), end(entities_with_number_values)); + + column_data->AppendSortedNumberIndicesWithSortedIndices(entities_with_number_values); + } + + //expand the structure by adding a new column/label/feature and populating with data from entities + void AddLabels(std::vector &label_ids, const std::vector &entities) + { + //make sure have data to add + if(label_ids.size() == 0 || entities.size() == 0) + return; + + //resize the matrix and populate column and label_id lookups + size_t num_columns_added = AddLabelsAsEmptyColumns(label_ids, entities.size()); + + size_t num_columns = columnData.size(); + size_t num_previous_columns = columnData.size() - num_columns_added; + + #ifdef MULTITHREAD_SUPPORT + //if big enough (enough entities and/or enough columns), try to use multithreading + if(num_columns_added > 1 && (numEntities > 10000 || (numEntities > 200 && num_columns_added > 10))) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + std::vector> columns_completed; + columns_completed.reserve(num_columns); + + for(size_t i = num_previous_columns; i < num_columns; i++) + { + columns_completed.emplace_back( + Concurrency::threadPool.EnqueueBatchTask([this, &entities, i]() { BuildLabel(i, entities); }) + ); + } + + enqueue_task_lock.Unlock(); + Concurrency::threadPool.CountCurrentThreadAsPaused(); + + for(auto &future : columns_completed) + future.wait(); + + Concurrency::threadPool.CountCurrentThreadAsResumed(); + + return; + } + } + //not running concurrently + #endif + + for(size_t i = num_previous_columns; i < num_columns; i++) + BuildLabel(i, entities); + } + + //returns true only if none of the entities have the label + inline bool IsColumnIndexRemovable(size_t column_index_to_remove) + { + //removable only if have no values; every entity is invalid + return (columnData[column_index_to_remove]->invalidIndices.size() == GetNumInsertedEntities()); + } + + //removes a column from the database + void RemoveColumnIndex(size_t column_index_to_remove); + + //finds any columns / labels that are no longer used by any entity and removes them + inline void RemoveAnyUnusedLabels() + { + //column_index is one greater than the actual index to keep it above zero + // work from high column indices to low for performance and because removal swaps + // the last column into the current column's place, so don't need to recheck the index or update the indices + for(size_t column_index = columnData.size(); column_index > 0; column_index--) + { + if(IsColumnIndexRemovable(column_index - 1)) + RemoveColumnIndex(column_index - 1); + } + } + + //adds an entity to the database + inline void AddEntity(Entity *entity, size_t entity_index) + { + size_t starting_cell_index = GetMatrixCellIndex(entity_index); + + //fill with missing values, including any empty indices + matrix.resize(starting_cell_index + columnData.size()); + + //fill in matrix cells from entity + size_t cell_index = starting_cell_index; + for(size_t column_index = 0; column_index < columnData.size(); column_index++, cell_index++) + { + EvaluableNodeImmediateValueType value_type; + EvaluableNodeImmediateValue value; + value_type = entity->GetValueAtLabelAsImmediateValue(columnData[column_index]->stringId, value); + + matrix[cell_index] = value; + + columnData[column_index]->InsertIndexValue(value_type, value, entity_index); + } + + //count this entity + if(entity_index >= numEntities) + numEntities = entity_index + 1; + } + + //removes an entity to the database using an incremental update scheme + inline void RemoveEntity(Entity *entity, size_t entity_index, size_t entity_index_to_reassign) + { + if(entity_index >= numEntities || columnData.size() == 0) + return; + + //if was the last entity and reassigning the last one or one out of bounds, + // simply delete from column data, delete last row, and return + if(entity_index + 1 == GetNumInsertedEntities() && entity_index_to_reassign >= entity_index) + { + DeleteEntityIndexFromColumns(entity_index); + DeleteLastRow(); + return; + } + + //make sure it's a valid rassignment + if(entity_index_to_reassign >= numEntities) + return; + + //if deleting a row and not replacing it, just fill as if it has no data + if(entity_index == entity_index_to_reassign) + { + DeleteEntityIndexFromColumns(entity_index); + + //fill with missing values + size_t starting_cell_index = GetMatrixCellIndex(entity_index); + for(size_t column_index = 0; column_index < columnData.size(); column_index++) + matrix[starting_cell_index + column_index].number = std::numeric_limits::quiet_NaN(); + return; + } + + //reassign index for each column + for(size_t column_index = 0; column_index < columnData.size(); column_index++) + { + auto &val_to_overwrite = GetValue(entity_index, column_index); + auto &value_of_index_to_reassign = GetValue(entity_index_to_reassign, column_index); + auto value_type_to_reassign = columnData[column_index]->GetIndexValueType(entity_index_to_reassign); + + //remove the value where it is + columnData[column_index]->DeleteIndexValue(value_of_index_to_reassign, entity_index_to_reassign); + + //change the destination to the value + columnData[column_index]->ChangeIndexValue(val_to_overwrite, value_type_to_reassign, value_of_index_to_reassign, entity_index); + } + + //copy data from entity_index_to_reassign to entity_index + memcpy((char *)&(matrix[entity_index * columnData.size()]), (char *)&(matrix[entity_index_to_reassign * columnData.size()]), sizeof(EvaluableNodeImmediateValue) * columnData.size()); + + //truncate matrix cache if removing the last entry, either by moving the last entity or by directly removing the last + if(entity_index_to_reassign + 1 == numEntities + || (entity_index_to_reassign + 1 >= numEntities && entity_index + 1 == numEntities)) + DeleteLastRow(); + + //clean up any labels that aren't relevant + RemoveAnyUnusedLabels(); + } + + //updates all of the label values for entity with index entity_index + inline void UpdateAllEntityLabels(Entity *entity, size_t entity_index) + { + if(entity_index >= numEntities) + return; + + size_t matrix_index = GetMatrixCellIndex(entity_index); + for(size_t column_index = 0; column_index < columnData.size(); column_index++) + { + EvaluableNodeImmediateValueType value_type; + EvaluableNodeImmediateValue value; + value_type = entity->GetValueAtLabelAsImmediateValue(columnData[column_index]->stringId, value); + + columnData[column_index]->ChangeIndexValue(matrix[matrix_index], value_type, value, entity_index); + matrix[matrix_index] = value; + + matrix_index++; + } + + //clean up any labels that aren't relevant + RemoveAnyUnusedLabels(); + } + + //like UpdateAllEntityLabels, but only updates labels for label_updated + inline void UpdateEntityLabel(Entity *entity, size_t entity_index, StringInternPool::StringID label_updated) + { + if(entity_index >= numEntities) + return; + + //find the column + auto column = labelIdToColumnIndex.find(label_updated); + if(column == end(labelIdToColumnIndex)) + return; + size_t column_index = column->second; + + //get the new value + EvaluableNodeImmediateValueType value_type; + EvaluableNodeImmediateValue value; + value_type = entity->GetValueAtLabelAsImmediateValue(columnData[column_index]->stringId, value); + + //update the value + auto &matrix_value = GetValue(entity_index, column_index); + columnData[column_index]->ChangeIndexValue(matrix_value, value_type, value, entity_index); + matrix_value = value; + + //remove the label if no longer relevant + if(IsColumnIndexRemovable(column_index)) + RemoveColumnIndex(column_index); + } + + constexpr size_t GetNumInsertedEntities() + { + return numEntities; + } + + //returns a reference to the BitArrayIntegerSet corresponding to the entities with numbers for column_index + inline EfficientIntegerSet &GetEntitiesWithValidNumbers(size_t column_index) + { + return columnData[column_index]->numberIndices; + } + + //returns a reference to the BitArrayIntegerSet corresponding to the entities with strings ids for column_index + inline EfficientIntegerSet &GetEntitiesWithValidStringIds(size_t column_index) + { + return columnData[column_index]->stringIdIndices; + } + + //given a feature_id and a range [low, high], fills out with all the entities with values of feature feature_id within specified range + //Note about Null/NaNs: + //if the feature value is Nan/Null, it will NOT be present in the search results, ie "x" != 3 will NOT include elements with x is nan/Null, even though nan/null != 3 + inline void FindAllEntitiesWithinRange(size_t feature_id, EvaluableNodeImmediateValueType value_type, + EvaluableNodeImmediateValue &low, EvaluableNodeImmediateValue &high, BitArrayIntegerSet &out, bool between_values = true) + { + if(numEntities == 0) + { + out.clear(); + return; + } + + auto column = labelIdToColumnIndex.find(feature_id); + if(column == labelIdToColumnIndex.end()) + { + out.clear(); + return; + } + + columnData[column->second]->FindAllIndicesWithinRange(value_type, low, high, out, between_values); + } + + //sets out to include only entities that have the given feature + inline void FindAllEntitiesWithFeature(size_t feature_id, BitArrayIntegerSet &out) + { + if(numEntities == 0) + { + out.clear(); + return; + } + + auto column = labelIdToColumnIndex.find(feature_id); + if(column == labelIdToColumnIndex.end()) + { + out.clear(); + return; + } + + columnData[column->second]->invalidIndices.NotTo(out, GetNumInsertedEntities()); + } + + //filters out to include only entities that have the given feature + inline void IntersectEntitiesWithFeature(size_t feature_id, BitArrayIntegerSet &out) + { + if(numEntities == 0) + { + out.clear(); + return; + } + + auto column = labelIdToColumnIndex.find(feature_id); + if(column == labelIdToColumnIndex.end()) + { + out.clear(); + return; + } + + columnData[column->second]->invalidIndices.EraseTo(out); + } + + //sets out to include only entities that have the given feature and records the values into + // entities and values respectively. enabled_entities is used as a buffer + inline void FindAllEntitiesWithValidNumbers(size_t feature_id, BitArrayIntegerSet &enabled_entities, + std::vector &entities, std::vector &values) + { + if(numEntities == 0) + return; + + auto column = labelIdToColumnIndex.find(feature_id); + if(column == labelIdToColumnIndex.end()) + return; + size_t column_index = column->second; + + columnData[column_index]->numberIndices.CopyTo(enabled_entities); + columnData[column_index]->nanIndices.EraseTo(enabled_entities); + + //resize buffers and place each entity and value into its respective buffer + entities.resize(enabled_entities.size()); + values.resize(enabled_entities.size()); + size_t index = 0; + for(auto entity_index : enabled_entities) + { + entities[index] = entity_index; + values[index] = GetValue(entity_index, column_index).number; + index++; + } + } + + //filters enabled_indices to include only entities that have the given feature + // records the entities into entities and values respectively + inline void IntersectEntitiesWithValidNumbers(size_t feature_id, BitArrayIntegerSet &enabled_entities, + std::vector &entities, std::vector &values) + { + if(numEntities == 0) + return; + + auto column = labelIdToColumnIndex.find(feature_id); + if(column == labelIdToColumnIndex.end()) + return; + size_t column_index = column->second; + + columnData[column_index]->numberIndices.IntersectTo(enabled_entities); + columnData[column_index]->nanIndices.EraseTo(enabled_entities); + + //resize buffers and place each entity and value into its respective buffer + entities.resize(enabled_entities.size()); + values.resize(enabled_entities.size()); + size_t index = 0; + for(auto entity_index : enabled_entities) + { + entities[index] = entity_index; + values[index] = GetValue(entity_index, column_index).number; + index++; + } + } + + //sets out to include only entities that don't have the given feature + inline void FindAllEntitiesWithoutFeature(size_t feature_id, BitArrayIntegerSet &out) + { + if(numEntities == 0) + { + out.clear(); + return; + } + + auto column = labelIdToColumnIndex.find(feature_id); + if(column == labelIdToColumnIndex.end()) + { + out.clear(); + return; + } + + columnData[column->second]->invalidIndices.CopyTo(out); + } + + //filters out to include only entities that don't have the given feature + inline void IntersectEntitiesWithoutFeature(size_t feature_id, BitArrayIntegerSet &out) + { + if(numEntities == 0) + return; + + auto column = labelIdToColumnIndex.find(feature_id); + if(column == labelIdToColumnIndex.end()) + return; + + columnData[column->second]->invalidIndices.IntersectTo(out); + } + + //given a feature_id, value_type, and value, inserts into out all the entities that have the value + inline void UnionAllEntitiesWithValue(size_t feature_id, + EvaluableNodeImmediateValueType value_type, EvaluableNodeImmediateValue &value, BitArrayIntegerSet &out) + { + auto column = labelIdToColumnIndex.find(feature_id); + if(column == labelIdToColumnIndex.end()) + return; + size_t column_index = column->second; + + if(value_type != ENIVT_CODE) + { + columnData[column_index]->UnionAllIndicesWithValue(value_type, value, out); + } + else //compare if code is equal + { + for(auto entity_index : columnData[column_index]->codeIndices) + { + if(EvaluableNode::AreDeepEqual(value.code, GetValue(entity_index, column_index).code)) + out.insert(entity_index); + } + } + } + + //Finds the Minimum or Maximum (with respect to feature_id feature value) num_to_find entities in the database; if is_max is true, finds max, else finds min + inline void FindMinMax(size_t feature_id, + EvaluableNodeImmediateValueType value_type, size_t num_to_find, bool is_max, + BitArrayIntegerSet *enabled_indices, BitArrayIntegerSet &out) + { + auto column = labelIdToColumnIndex.find(feature_id); + if(column == labelIdToColumnIndex.end()) + return; + + columnData[column->second]->FindMinMax(value_type, num_to_find, is_max, enabled_indices, out); + } + + //returns the number of unique values for a column for the given value_type + size_t GetNumUniqueValuesForColumn(size_t column_index, EvaluableNodeImmediateValueType value_type) + { + auto &column_data = columnData[column_index]; + if(value_type == ENIVT_NUMBER) + return column_data->numberIndices.size(); + else if(value_type == ENIVT_STRING_ID) + return column_data->stringIdIndices.size(); + else //return everything else + return GetNumInsertedEntities() - column_data->invalidIndices.size(); + } + + //returns a function that will take in an entity index iterator and reference to a double to store the value and return true if the value is found + // assumes and requires column_index is a valid column (not a feature_id) + template + inline std::function GetNumberValueFromEntityIteratorFunction(size_t column_index) + { + auto number_indices_ptr = &columnData[column_index]->numberIndices; + + return [&, number_indices_ptr, column_index] + (Iter i, double &value) + { + size_t entity_index = *i; + if(!number_indices_ptr->contains(entity_index)) + return false; + + value = GetValue(entity_index, column_index).number; + return true; + }; + } + + //returns a function that will take in an entity index and reference to a double to store the value and return true if the value is found + // assumes and requires column_index is a valid column (not a feature_id) + inline std::function GetNumberValueFromEntityIndexFunction(size_t column_index) + { + //if invalid column_index, then always return false + if(column_index >= columnData.size()) + return [](size_t i, double &value) { return false; }; + + auto number_indices_ptr = &columnData[column_index]->numberIndices; + + return [&, number_indices_ptr, column_index] + (size_t i, double &value) + { + if(!number_indices_ptr->contains(i)) + return false; + + value = GetValue(i, column_index).number; + return true; + }; + } + + //returns a function that will take in an entity index iterator and reference to a string id to store the value and return true if the value is found + // assumes and requires column_index is a valid column (not a feature_id) + template + inline std::function GetStringIdValueFromEntityIteratorFunction(size_t column_index) + { + auto string_indices_ptr = &columnData[column_index]->stringIdIndices; + + return [&, string_indices_ptr, column_index] + (Iter i, StringInternPool::StringID &value) + { + size_t entity_index = *i; + if(!string_indices_ptr->contains(entity_index)) + return false; + + value = GetValue(entity_index, column_index).stringID; + return true; + }; + } + + //populates distances_out with all entities and their distances that have a distance to target less than max_dist + //if enabled_indices is not nullptr, intersects with the enabled_indices set. + void FindEntitiesWithinDistance(GeneralizedDistance &dist_params, std::vector &position_label_ids, + std::vector &position_values, std::vector &position_value_types, + double max_dist, BitArrayIntegerSet &enabled_indices, std::vector> &distances_out); + + //Finds the top_k nearest neighbors results to the entity at search_index. + // if expand_to_first_nonzero_distance is set, then it will expand top_k until it it finds the first nonzero distance or until it includes all enabled indices + // if const_dist_params is true, then it will make a copy before making any modifications + //will not modify enabled_indices, but instead will make a copy for any modifications + void FindEntitiesNearestToIndexedEntity(GeneralizedDistance *dist_params_ref, std::vector &position_label_ids, + bool constant_dist_params, size_t search_index, size_t top_k, BitArrayIntegerSet &enabled_indices, + bool expand_to_first_nonzero_distance, std::vector> &distances_out, + size_t ignore_index = std::numeric_limits::max(), RandomStream rand_stream = RandomStream()); + + //Finds the nearest neighbors + //enabled_indices is the set of entities to find from, and will be modified + void FindNearestEntities(GeneralizedDistance &dist_params, std::vector &position_label_ids, + std::vector &position_values, std::vector &position_value_types, + size_t top_k, size_t ignore_entity_index, BitArrayIntegerSet &enabled_indices, + std::vector> &distances_out, RandomStream rand_stream = RandomStream()); + +protected: + + //deletes/pops off the last row in the matrix cache + inline void DeleteLastRow() + { + if(matrix.size() == 0) + return; + + //truncate matrix cache + numEntities--; + matrix.resize(matrix.size() - columnData.size()); + } + + //deletes the index and associated data + void DeleteEntityIndexFromColumns(size_t index); + + //adds a new labels to the database, populating new cells with -NaN, and updating the number of entities + // assumes label_ids is not empty and num_entities is nonzero + //returns the number of new columns inserted + size_t AddLabelsAsEmptyColumns(std::vector &label_ids, size_t num_entities); + + //computes each partial sum and adds the term to the partial sums associated for each id in entity_indices for query_feature_index + //returns the number of entities indices accumulated + size_t ComputeAndAccumulatePartialSums(GeneralizedDistance &dist_params, + EvaluableNodeImmediateValue value, EvaluableNodeImmediateValueType value_type, + SortedIntegerSet &entity_indices, size_t query_feature_index, size_t absolute_feature_index) + { + size_t num_entity_indices = entity_indices.size(); + + auto &partial_sums = parametersAndBuffers.partialSums; + const auto accum_location = partial_sums.GetAccumLocation(query_feature_index); + + //for each found element, accumulate associated partial sums + for(size_t entity_index : entity_indices) + { + //get value + auto &other_value = GetValue(entity_index, absolute_feature_index); + auto other_value_type = columnData[absolute_feature_index]->GetIndexValueType(entity_index); + + //compute term + double term = dist_params.ComputeDistanceTermRegular(value, other_value, value_type, other_value_type, query_feature_index); + + //accumulate + partial_sums.Accum(entity_index, accum_location, term); + } + + return num_entity_indices; + } + + //adds term to the partial sums associated for each id in entity_indices for query_feature_index + //returns the number of entities indices accumulated + inline size_t AccumulatePartialSums(SortedIntegerSet &entity_indices, size_t query_feature_index, double term) + { + size_t num_entity_indices = entity_indices.size(); + + auto &partial_sums = parametersAndBuffers.partialSums; + const auto accum_location = partial_sums.GetAccumLocation(query_feature_index); + size_t max_element = partial_sums.numInstances; + + auto &entity_indices_vector = entity_indices.GetIntegerVector(); + + //it's almost always faster to just accumulate an index than to check if it is a valid index + // and then only accumulate if it is valid + //however, indices beyond the range of partial_sums will cause an issue + //therefore, only trim back the end if needed, and trim back to the largest possible element id (max_element - 1) + if(entity_indices.GetEndInteger() >= max_element) + num_entity_indices = entity_indices.GetFirstIntegerVectorLocationGreaterThan(max_element - 1); + + //for each found element, accumulate associated partial sums, or if zero, just mark that it's accumulated + if(term != 0.0) + { + #pragma omp parallel for schedule(static) if(num_entity_indices > 300) + for(int64_t i = 0; i < static_cast(num_entity_indices); i++) + { + const auto entity_index = entity_indices_vector[i]; + partial_sums.Accum(entity_index, accum_location, term); + } + } + else //term == 0.0 + { + #pragma omp parallel for schedule(static) if(num_entity_indices > 300) + for(int64_t i = 0; i < static_cast(num_entity_indices); i++) + { + const auto entity_index = entity_indices_vector[i]; + partial_sums.AccumZero(entity_index, accum_location); + } + } + + return num_entity_indices; + } + + //adds term to the partial sums associated for each id in entity_indices for query_feature_index + //returns the number of entities indices accumulated + inline size_t AccumulatePartialSums(BitArrayIntegerSet &entity_indices, size_t query_feature_index, double term) + { + size_t num_entity_indices = entity_indices.size(); + if(num_entity_indices == 0) + return 0; + + auto &partial_sums = parametersAndBuffers.partialSums; + const auto accum_location = partial_sums.GetAccumLocation(query_feature_index); + size_t max_element = partial_sums.numInstances; + + if(term != 0.0) + { + entity_indices.IterateOver( + [&partial_sums, &accum_location, term] + (size_t entity_index) + { + partial_sums.Accum(entity_index, accum_location, term); + }, + max_element); + } + else + { + entity_indices.IterateOver( + [&partial_sums, &accum_location] + (size_t entity_index) + { + partial_sums.AccumZero(entity_index, accum_location); + }, + max_element); + } + + return entity_indices.size(); + } + + //adds term to the partial sums associated for each id in entity_indices for query_feature_index + //returns the number of entities indices accumulated + inline size_t AccumulatePartialSums(EfficientIntegerSet &entity_indices, size_t query_feature_index, double term) + { + if(entity_indices.IsSisContainer()) + return AccumulatePartialSums(entity_indices.GetSisContainer(), query_feature_index, term); + else + return AccumulatePartialSums(entity_indices.GetBaisContainer(), query_feature_index, term); + } + + //search a projection width in terms of bucket count or number of collected entities + //accumulates partial sums + //searches until num_entities_to_populate are popluated or other heuristics have been reached + //will only consider indices in enabled_indiced + // absolute_feature_index is the offset to access the feature relative to the entire data store + // query_feature_index is the offset to access the feature relative to the particular query data parameters + //returns the smallest partial sum for any value not yet computed + double PopulatePartialSumsWithSimilarFeatureValue(GeneralizedDistance &dist_params, + EvaluableNodeImmediateValue value, EvaluableNodeImmediateValueType value_type, + size_t num_entities_to_populate, bool expand_search_if_optimal, + size_t query_feature_index, size_t absolute_feature_index, BitArrayIntegerSet &enabled_indices); + + //computes a heuristically derived set of partial sums across all the enabled features from parametersAndBuffers.targetValues[i] and parametersAndBuffers.targetColumnIndices[i] + // if enabled_indices is not nullptr, then will only use elements in that list + // uses top_k for heuristics as to how many partial sums to compute + // will compute and populate min_unpopulated_distances and min_distance_by_unpopulated_count, where the former is the next smallest uncomputed feature distance indexed by the number of features not computed + // and min_distance_by_unpopulated_count is the total distance of all uncomputed features where the index is the number of uncomputed features + void PopulateInitialPartialSums(GeneralizedDistance &dist_params, size_t top_k, size_t num_enabled_features, BitArrayIntegerSet &enabled_indices, + std::vector &min_unpopulated_distances, std::vector &min_distance_by_unpopulated_count); + + void PopulatePotentialGoodMatches(FlexiblePriorityQueue> &potential_good_matches, + BitArrayIntegerSet &enabled_indices, PartialSumCollection &partial_sums, size_t top_k); + + //returns the distance between two nodes while respecting the feature mask + inline double GetDistanceBetween(GeneralizedDistance &dist_params, + std::vector &target_values, std::vector &target_value_types, + std::vector &target_column_indices, size_t other_index) + { + const size_t matrix_base_position = other_index * columnData.size(); + + double dist_accum = 0.0; + for(size_t i = 0; i < target_values.size(); i++) + { + if(dist_params.IsFeatureEnabled(i)) + { + size_t column_index = target_column_indices[i]; + auto &other_value = matrix[matrix_base_position + column_index]; + auto other_value_type = columnData[column_index]->GetIndexValueType(other_index); + + dist_accum += dist_params.ComputeDistanceTermRegular(target_values[i], other_value, target_value_types[i], other_value_type, i); + } + } + + double dist = dist_params.InverseExponentiateDistance(dist_accum); + return dist; + } + + //computes the distance term for the entity, query_feature_index, and feature_type, + // where the value does not match any in the SBFDS + //assumes that null values have already been taken care of for nominals + __forceinline double ComputeDistanceTermNonMatch(GeneralizedDistance &dist_params, std::vector &target_label_indices, + std::vector &target_values, std::vector &target_value_types, + size_t entity_index, size_t query_feature_index) + { + auto feature_type = dist_params.featureParams[query_feature_index].featureType; + + if(feature_type == FDT_NOMINAL) + return dist_params.ComputeDistanceTermNominalNonMatch(query_feature_index); + else + { + const size_t column_index = target_label_indices[query_feature_index]; + + if(feature_type == FDT_CONTINUOUS_UNIVERSALLY_NUMERIC) + { + return dist_params.ComputeDistanceTermNonNominalNonCyclicOneNonNullRegular(target_values[query_feature_index].number - GetValue(entity_index, column_index).number, query_feature_index); + } + else if(feature_type == FDT_CONTINUOUS_NUMERIC) + { + auto &column_data = columnData[column_index]; + if(column_data->numberIndices.contains(entity_index)) + return dist_params.ComputeDistanceTermNonNominalNonCyclicOneNonNullRegular(target_values[query_feature_index].number - GetValue(entity_index, column_index).number, query_feature_index); + else + return dist_params.ComputeDistanceTermKnownToUnknown(query_feature_index); + } + else if(feature_type == FDT_CONTINUOUS_NUMERIC_CYCLIC) + { + auto &column_data = columnData[column_index]; + if(column_data->numberIndices.contains(entity_index)) + return dist_params.ComputeDistanceTermNonNominalOneNonNullRegular(target_values[query_feature_index].number - GetValue(entity_index, column_index).number, query_feature_index); + else + return dist_params.ComputeDistanceTermKnownToUnknown(query_feature_index); + } + else //feature_type == FDT_CONTINUOUS_CODE + { + auto &other_value = GetValue(entity_index, column_index); + auto other_value_type = columnData[column_index]->GetIndexValueType(entity_index); + + return dist_params.ComputeDistanceTermRegular(target_values[query_feature_index], other_value, target_value_types[query_feature_index], other_value_type, query_feature_index); + } + } + } + + //given an estimate of distance that uses best_possible_feature_distance filled in for any features not computed, + // this function iterates over the partial sums indices, replacing each uncomputed feature with the actual distance for that feature + //returns the distance + //assumes that all features that are exact matches have already been computed + __forceinline double ResolveDistanceToNonMatchTargetValues(GeneralizedDistance &dist_params, std::vector &target_label_indices, + std::vector &target_values, std::vector &target_value_types, + PartialSumCollection &partial_sums, size_t entity_index, size_t num_target_labels) + { + //calculate full non-exponentiated Minkowski distance to the target + double distance = partial_sums.GetSum(entity_index); + + for(auto it = partial_sums.BeginPartialSumIndex(entity_index); *it < num_target_labels; ++it) + { + if(it.IsIndexComputed()) + continue; + + size_t query_feature_index = *it; + distance += ComputeDistanceTermNonMatch(dist_params, target_label_indices, target_values, target_value_types, + entity_index, query_feature_index); + } + + return distance; + } + + //given an estimate of distance that uses best_possible_feature_distance filled in for any features not computed, + // this function iterates over the partial sums indices, replacing each uncomputed feature with the actual distance for that feature + // if the distance ever exceeds reject_distance, then the resolving will stop early + // if reject_distance is infinite, then it will just complete the distance terms + //returns a pair of a boolean and the distance. if the boolean is true, then the distance is less than or equal to the reject distance + //assumes that all features that are exact matches have already been computed + __forceinline std::pair ResolveDistanceToNonMatchTargetValues(GeneralizedDistance &dist_params, std::vector &target_label_indices, + std::vector &target_values, std::vector &target_value_types, + PartialSumCollection &partial_sums, size_t entity_index, std::vector &min_distance_by_unpopulated_count, size_t num_features, + double reject_distance, std::vector &min_unpopulated_distances) + { + auto [num_calculated_features, distance] = partial_sums.GetNumFilledAndSum(entity_index); + + //complete known sums with worst and best possibilities + //calculate the number of features for which the minkowski distance term has not yet been calculated + size_t num_uncalculated_features = (num_features - num_calculated_features); + //if have already calculated everything, then already have the distance + if(num_uncalculated_features == 0) + return std::make_pair(distance <= reject_distance, distance); + + //if too far out, reject immediately + distance += min_distance_by_unpopulated_count[num_uncalculated_features]; + if(distance > reject_distance) + return std::make_pair(false, distance); + + //use infinite loop with exit at the end to remove need for extra iterator increment + for(auto it = partial_sums.BeginPartialSumIndex(entity_index); true; ++it) + { + if(it.IsIndexComputed()) + continue; + + //remove distance already added and reduce num_uncalculated_partial_sum_features + distance -= min_unpopulated_distances[--num_uncalculated_features]; + + const size_t query_feature_index = *it; + distance += ComputeDistanceTermNonMatch(dist_params, target_label_indices, target_values, target_value_types, + entity_index, query_feature_index); + + //break out of the loop before the iterator is incremented to save a few cycles + if(distance > reject_distance) + return std::make_pair(false, distance); + + if(num_uncalculated_features == 0) + break; + } + + //done with computation + return std::make_pair(true, distance); + } + + //populates the next target attribute in each vector based on column_index, position data, and mkdist_feature_type + // if mkdist_feature_type can be modified for efficiency, this function will update it, which is why it is passed by reference + __forceinline void PopulateNextTargetAttributes(GeneralizedDistance &dist_params, + std::vector &target_column_indices, std::vector &target_values, + std::vector &target_value_types, size_t column_index, + EvaluableNodeImmediateValue &position_value, EvaluableNodeImmediateValueType position_value_type, + FeatureDifferenceType &mkdist_feature_type) + { + target_column_indices.push_back(column_index); + + if(mkdist_feature_type == FDT_NOMINAL || mkdist_feature_type == FDT_CONTINUOUS_STRING || mkdist_feature_type == FDT_CONTINUOUS_CODE) + { + target_values.push_back(position_value); + target_value_types.push_back(position_value_type); + } + else // mkdist_feature_type == FDT_CONTINUOUS_NUMERIC or FDT_CONTINUOUS_NUMERIC_CYCLIC + { + //if everything is either non-existant or numeric, then can shortcut later + auto &column_data = columnData[column_index]; + size_t num_values_stored_as_numbers = column_data->numberIndices.size() + column_data->invalidIndices.size() + column_data->nullIndices.size(); + if(GetNumInsertedEntities() == num_values_stored_as_numbers && mkdist_feature_type == FDT_CONTINUOUS_NUMERIC) + mkdist_feature_type = FDT_CONTINUOUS_UNIVERSALLY_NUMERIC; + + auto value_type = position_value_type; + if(value_type == ENIVT_NUMBER) + { + target_values.push_back(position_value); + target_value_types.push_back(ENIVT_NUMBER); + } + else //looking for continuous and not a number, so just put as nan + { + target_values.push_back(std::numeric_limits::quiet_NaN()); + target_value_types.push_back(ENIVT_NUMBER); + } + } + } + + //populates targetValues and targetColumnIndices given the selected target values for each value in corresponding position* parameters + inline void PopulateTargetValuesAndLabelIndices(GeneralizedDistance &dist_params, + std::vector &position_label_ids, std::vector &position_values, + std::vector &position_value_types) + { + //setup target values + auto &target_values = parametersAndBuffers.targetValues; + target_values.clear(); + + auto &target_value_types = parametersAndBuffers.targetValueTypes; + target_value_types.clear(); + + auto &target_column_indices = parametersAndBuffers.targetColumnIndices; + target_column_indices.clear(); + + for(size_t i = 0; i < position_label_ids.size(); i++) + { + auto column = labelIdToColumnIndex.find(position_label_ids[i]); + if(column == end(labelIdToColumnIndex)) + continue; + + if(dist_params.IsFeatureEnabled(i)) + { + PopulateNextTargetAttributes(dist_params, + target_column_indices, target_values, target_value_types, + column->second, position_values[i], position_value_types[i], + dist_params.featureParams[i].featureType); + } + } + } + + //recomputes feature gaps and computes parametersAndBuffers.maxFeatureGaps + // returns the smallest of the maximum feature gaps among the features + inline void PopulateUnknownFeatureValueTerms(GeneralizedDistance &dist_params) + { + auto &target_column_indices = parametersAndBuffers.targetColumnIndices; + auto &target_values = parametersAndBuffers.targetValues; + auto &target_value_types = parametersAndBuffers.targetValueTypes; + + for(size_t i = 0; i < target_column_indices.size(); i++) + { + auto &feature_params = dist_params.featureParams[i]; + size_t column_index = target_column_indices[i]; + + //if either known or unknown to unknown is missing, need to compute difference + // and store it where it is needed + double unknown_distance_term = 0.0; + if(FastIsNaN(feature_params.knownToUnknownDifference) + || FastIsNaN(feature_params.unknownToUnknownDifference)) + { + unknown_distance_term = columnData[column_index]->GetMaxDifferenceTermFromValue( + feature_params, target_value_types[i], target_values[i]); + + if(FastIsNaN(feature_params.knownToUnknownDifference)) + feature_params.knownToUnknownDifference = unknown_distance_term; + if(FastIsNaN(feature_params.unknownToUnknownDifference)) + feature_params.unknownToUnknownDifference = unknown_distance_term; + } + + dist_params.ComputeAndStoreUncertaintyDistanceTerms(i); + } + } + + //returns all elements in the database that yield valid distances along with their sorted distances to the values for entity + // at target_index, optionally limits results count to k + inline void FindAllValidElementDistances(GeneralizedDistance &dist_params, std::vector &target_column_indices, + std::vector &target_values, std::vector &target_value_types, + BitArrayIntegerSet &valid_indices, std::vector> &distances_out, RandomStream rand_stream) + { + auto &sorted_results = parametersAndBuffers.sortedResults; + sorted_results.clear(); + sorted_results.SetStream(rand_stream); + + dist_params.SetHighAccuracy(dist_params.highAccuracy || dist_params.recomputeAccurateDistances); + + for(auto index : valid_indices) + { + double distance = GetDistanceBetween(dist_params, target_values, target_value_types, target_column_indices, index); + distances_out.emplace_back(distance, index); + } + + std::sort(begin(distances_out), end(distances_out)); + } + + //contains entity lookups for each of the values for each of the columns + std::vector> columnData; + + //for multithreading, there should be one of these per thread +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + thread_local +#endif + static SBFDSParametersAndBuffers parametersAndBuffers; + + //map from label id to column index in the matrix + FastHashMap labelIdToColumnIndex; + + //matrix of cases (rows) * features (columns) + std::vector matrix; + + //the number of entities in the data store; all indices below this value are populated + size_t numEntities; +}; diff --git a/src/Amalgam/ThreadPool.cpp b/src/Amalgam/ThreadPool.cpp new file mode 100644 index 00000000..d7d66a43 --- /dev/null +++ b/src/Amalgam/ThreadPool.cpp @@ -0,0 +1,102 @@ +//project headers: +#include "ThreadPool.h" + +ThreadPool::ThreadPool(size_t max_num_threads) +{ + shutdownThreads = false; + ChangeThreadPoolSize(max_num_threads); + + //there must be one active thread + numActiveThreads = 1; + + mainThreadId = std::this_thread::get_id(); +} + +void ThreadPool::ChangeThreadPoolSize(size_t new_max_num_threads) +{ + std::unique_lock lock(threadsMutex); + + //don't need to change anything + if(new_max_num_threads == threads.size()) + return; + + //if reducing thread count, clean up all jobs and clear out all threads + if(new_max_num_threads < threads.size()) + { + ShutdownAllThreads(); + threads.clear(); + + //no longer shutting down, allow to build up threads + shutdownThreads = false; + } + + size_t num_new_threads = new_max_num_threads - threads.size(); + + //place an empty idle task for each thread waiting for work + for(size_t i = 0; i < num_new_threads; i++) + { + threads.emplace_back( + [this] + { + //infinite loop waiting for work + for(;;) + { + //container for the task + std::function task; + + //fetch from queue + { + std::unique_lock lock(taskQueueMutex); + + //if no more work, wait until shutdown or more work + if(taskQueue.empty()) + { + //wait until either shutting down or more work has been added + waitForTask.wait(lock, + [this] { return shutdownThreads || !taskQueue.empty(); }); + + //only can make it here if shutting down (otherwise taskQueue has something in it) + if(shutdownThreads) + return; + } + + //take ownership of the task so it can be destructed when complete + // (won't increment shared_ptr counter) + task = std::move(taskQueue.front()); + taskQueue.pop(); + + //count the thread as active before releasing the lock + numActiveThreads++; + } + + task(); + numActiveThreads--; + } + } + ); + } + + //notify all just in case a new task was added as the threads were being created + // but unlock to allow threads to proceed + lock.unlock(); + waitForTask.notify_all(); +} + +ThreadPool::~ThreadPool() +{ + ShutdownAllThreads(); +} + +void ThreadPool::ShutdownAllThreads() +{ + //initiate shutdown + { + std::unique_lock lock(taskQueueMutex); + shutdownThreads = true; + } + + //join all threads + waitForTask.notify_all(); + for(std::thread &worker : threads) + worker.join(); +} diff --git a/src/Amalgam/ThreadPool.h b/src/Amalgam/ThreadPool.h new file mode 100644 index 00000000..eeec94ed --- /dev/null +++ b/src/Amalgam/ThreadPool.h @@ -0,0 +1,220 @@ +#pragma once + +//system headers: +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//Creates a flexible thread pool for generic tasks +class ThreadPool +{ +public: + ThreadPool(size_t max_num_threads = 0); + + //will change the number of threads in the pool to the number specified + void ChangeThreadPoolSize(size_t new_max_num_threads); + + //returns the number of threads that are performing tasks + inline size_t GetNumActiveThreads() + { + return numActiveThreads; + } + + //returns a vector of the thread ids for the thread pool + inline std::vector GetThreadIds() + { + std::vector thread_ids; + thread_ids.reserve(threads.size() + 1); + thread_ids.push_back(mainThreadId); + for(std::thread &worker : threads) + thread_ids.push_back(worker.get_id()); + return thread_ids; + } + + //destroys all the threads and waits to join them + ~ThreadPool(); + + //enqueues a task into the thread pool comprised of a function and arguments, automatically inferring the function type + template + std::future::type> EnqueueSingleTask(FunctionType &&function, ArgsType &&...args) + { + using return_type = typename std::invoke_result::type; + + //create a shared pointer of the task, as we don't know which could happen first, either + // this function will return and the thread will free the memory, or the thread could return really fast + // and this function will need to clean up the memory, but both need a valid reference + auto task = std::make_shared< std::packaged_task >( + std::bind(std::forward(function), std::forward(args) ...) + ); + + //hold the future to return + std::future result = task->get_future(); + + //put the task on the queue + { + std::unique_lock lock(taskQueueMutex); + taskQueue.emplace( + [task]() + { + (*task)(); + } + ); + } + waitForTask.notify_one(); + + return result; + } + + //Contains a lock for the task queue for calling EnqueueBatchTask repeatedly while maintaining the lock and layer count + struct BatchTaskEnqueueLockAndLayer + { + inline BatchTaskEnqueueLockAndLayer(std::condition_variable *wait_for_task, std::mutex &task_queue_mutex) + { + waitForTask = wait_for_task; + lock = std::unique_lock(task_queue_mutex); + } + + //move constructor to allow a function to build and return the lock + inline BatchTaskEnqueueLockAndLayer(BatchTaskEnqueueLockAndLayer &&other) + : waitForTask(std::move(other.waitForTask)), lock(std::move(other.lock)) + { + //mark the lock as invalid so the other's destructor doesn't try to unlock an invalid lock + other.waitForTask = nullptr; + } + + //move assignment + inline BatchTaskEnqueueLockAndLayer &operator =(BatchTaskEnqueueLockAndLayer &&other) + { + std::swap(waitForTask, other.waitForTask); + std::swap(lock, other.lock); + return *this; + } + + //unlocks the lock if locked and invalidates the knowledge of any threads being available + inline void Unlock() + { + //if waitForTask is not nullptr, then the lock must be locked + // otherwise just let the lock destructor clean up + if(waitForTask != nullptr) + { + lock.unlock(); + waitForTask->notify_all(); + waitForTask = nullptr; + } + } + + inline ~BatchTaskEnqueueLockAndLayer() + { + Unlock(); + } + + //returns true if there's available threads as denoted by a proper way to notify the threads + constexpr bool AreThreadsAvailable() + { + return (waitForTask != nullptr); + } + + //marks as there aren't threads available + constexpr void MarkAsNoThreadsAvailable() + { + waitForTask = nullptr; + } + + //used to notify threads when enqueueing is done + // this is marked as nullptr if there aren't available threads + std::condition_variable *waitForTask; + + //lock for enqueueing tasks + std::unique_lock lock; + }; + + //attempts to begin a batch of tasks of num_tasks + //if fail_unless_task_queue_availability is true and there are backlogged tasks, + // then it will not begin the task batch and return false; this is useful for preventing deadlock + // when attempting to enqueue tasks which are subtasks of other tasks + BatchTaskEnqueueLockAndLayer BeginEnqueueBatchTask(bool fail_unless_task_queue_availability = true) + { + BatchTaskEnqueueLockAndLayer btel(&waitForTask, taskQueueMutex); + + if(fail_unless_task_queue_availability) + { + //need to make sure there's at least one extra thread available to make sure that this batch of tasks can be run + // in case there are any interdependencies, in order to prevent deadlock + if(taskQueue.size() + numActiveThreads >= threads.size()) + btel.MarkAsNoThreadsAvailable(); + } + + return btel; + } + + //enqueues a task into the thread pool comprised of a function and arguments, automatically inferring the function type + template + std::future::type> EnqueueBatchTask(FunctionType &&function, ArgsType &&...args) + { + using return_type = typename std::invoke_result::type; + + //create a shared pointer of the task, as we don't know which could happen first, either + // this function will return and the thread will free the memory, or the thread could return really fast + // and this function will need to clean up the memory, but both need a valid reference + auto task = std::make_shared< std::packaged_task >( + std::bind(std::forward(function), std::forward(args) ...) + ); + + //hold the future to return + std::future result = task->get_future(); + + //put the task on the queue + taskQueue.emplace( + [task]() + { + (*task)(); + } + ); + + return result; + } + + //if a thread will be sitting waiting for other threads to complete, it can mark itself as inactive + // but it should call ResumeCurrentThread once ready again + inline void CountCurrentThreadAsPaused() + { + numActiveThreads--; + } + + //if a thread will be sitting waiting for other threads to complete, it can mark itself as inactive via PauseCurrentThread + // and should call ResumeCurrentThread once ready again + inline void CountCurrentThreadAsResumed() + { + numActiveThreads++; + } + +private: + //waits for all threads to complete, then shuts them down + void ShutdownAllThreads(); + + //the thread pool + std::mutex threadsMutex; + std::vector threads; + + //lock to notify threads when to start work + std::condition_variable waitForTask; + + //if true, then all threads should end work so they can be joined + bool shutdownThreads; + + //tasks for the threadpool to complete + std::mutex taskQueueMutex; + std::queue> taskQueue; + + //number of threads running + std::atomic numActiveThreads; + + //id of the main thread + std::thread::id mainThreadId; +}; diff --git a/src/Amalgam/amlg_code/full_test.amlg b/src/Amalgam/amlg_code/full_test.amlg new file mode 100644 index 00000000..70572db2 --- /dev/null +++ b/src/Amalgam/amlg_code/full_test.amlg @@ -0,0 +1,4003 @@ +;Full test +;This is a suite of unit tests. +; This is the second line of the unit test description. +(seq + (print "--Amalgam Version--\n") + (print (system "version") "\n") + + (print "--system_time--\n") + (declare (assoc start_time (system_time))) + + (print "--label--\n") + ;label test + (#label1 print "hello world: " (* #label-number-2 3 4) #label3 " and " (* 1 2) "\n") + + (#"label 1" print "hello world: " (* #"label number 2" 3 4) #"label 3" " and " (* 1 2) "\n") + + (print "--non-keyword token--\n") + (print (call (parse "(6)")) "\n") + (print (call (parse "(notakeyword)")) "\n") + + (print "--get_defaults--\n") + (print (get_defaults "mutation_opcodes")) + (print (get_defaults "mutation_types")) + + (print "--parse and unparse--\n") + (print (unparse (parse "(print \"hello\")")) "\n") + (print (parse (unparse (list (sqrt -1) .nan .infinity -.infinity))) "\n") + + (print (unparse (associate "a" 1 "b" 2 "c" (list "alpha" "beta" "gamma"))) "\n") + (print (unparse (associate "a" 1 "b" 2 "c" (list "alpha" "beta" "gamma")) (true)) "\n") + + (print "--if--\n") + (if 1 (print "if 1\n")) + (if 0 (print "if 2 error\n") (print "if 2\n")) + (if (null) (print "1\n") + 0 (print "2\n") + 0 (print "3\n") + (print "4\n") + ) + + (print "--seq--\n") + (seq (print 1 "\n") (print 2 "\n") (print 3 "\n")) + + (print "--lambda and call--\n") + (declare (assoc foo (lambda + (declare (assoc x 6) + (+ x 2) + ) + ))) + (print foo) + (print (call foo (assoc x 3)) "\n") + + (print (lambda (lambda (+ 1 2)) (true) )) + + (print "--call_sandboxed--\n") + (print (call_sandboxed (lambda (+ y 4)) (assoc y 3)) "\n") + (print (call_sandboxed (lambda (+ y x 4)) (assoc y 3)) "\n") + + (print "--while--\n") + (assign (assoc zz 1)) + (while (< zz 10) + (print zz "\n") + (assign (assoc zz (+ zz 1))) + ) + + (print "--conclude--\n") + + (print (seq + (print "seq1 ") + (conclude "success") + (print "seq2") + ) "\n") + + (print (while (< 1 100) + (print "while1 ") + (conclude "success") + (print "while2") + ) "\n") + + (print (let (assoc a 1) + (print "let1 ") + (conclude "success") + (print "let2") + ) "\n") + + (print (declare (assoc abcdefghijklmnop 1) + (print "declare1 ") + (conclude "success") + (print "declare2") + ) "\n") + + (seq + + (print 1) + + (declare + (assoc) + (while 1 + (if (true) (conclude)) + ) + (print 4) + ) + (print "2\n") +) + + (print "--declare--\n") + (declare (assoc x 7)) + (print x "\n") + (declare (assoc x 4)) + (print x "\n") + + (print "--let--\n") + (let (assoc x 4 y 6) (print (+ x y) "\n")) + + (let (assoc x 4 y 6) + (declare (assoc x 5 z 1) + (print (+ x y z) "\n")) + ) + + (print "--assign--\n") + (assign (assoc x 10)) + (print x "\n") + + (assign "x" 20) + (print x "\n") + + (assign "x" (list 0 1 2 (associate "a" 1 "b" 2 "c" 3))) + (print x) + + (assign "x" (list 1) "not 1") + (print x) + + (assign "x" (list 3 "c") (list "c attribute") (list 3 "a") (list "a attribute")) + (print x) + + (print "--accum--\n") + (assign (assoc x 10)) + (print x "\n") + (accum (assoc x 1)) + (print x "\n") + + (declare (assoc + accum_string "abc" + accum_list (list 1 2 3) + accum_assoc (associate "a" 1 "b" 2) + )) + + (accum (assoc accum_string "def")) + (print accum_string "\n") + + (accum (assoc accum_list (list 4 5 6))) + (print accum_list "\n") + + (accum (assoc accum_list (associate "7" 8))) + (print accum_list "\n") + + (accum (assoc accum_assoc (associate "c" 3 "d" 4))) + (print accum_assoc "\n") + + (accum (assoc accum_assoc (list "e" 5))) + (print accum_assoc "\n") + + (assign "x" 1) + (accum "x" (null) 4) + (print x "\n") + + (assign "x" (list 0 1 2 (associate "a" 1 "b" 2 "c" 3))) + (accum "x" (list 1) 1) + (print x "\n") + + (print "--retrieve--\n") + (assign (assoc www 1)) + (print (retrieve "www") "\n") + + (assign (assoc rwww 1 raaa 2)) + (print (retrieve "rwww") "\n") + (print (retrieve (list "rwww" "raaa"))) + (print (retrieve (zip (list "rwww" "raaa") null))) + + (print "--assign--\n") + (declare + (assoc bar (lambda + (declare + (assoc x 6) + (+ x 2)) + ))) + (print (call foo (assoc y 3)) "\n") + + (assign (assoc x 12)) + (print x "\n") + (print (set (list 0 1 2 3 4) 2 10)) + (print (set (associate "a" 1 "b" 2) "a" 3)) + + (print "--+--\n") + (print (+ 1 2 3 4) "\n") + + (print "-----\n") + (print (- 1 2 3 4) "\n") + + (print (- 3) "\n") + + (print "--*--\n") + (print (* 1 2 3 4) "\n") + + (print "--/--\n") + (print (/ 1.0 2 3 4) "\n") + + (print "--mod--\n") + (print (mod 1 2 3 4) "\n") + + (print "--get_digits--\n") + + (print (get_digits 1234567.8 10)) + (print (get_digits 1234567.89 10)) + (print (get_digits 1234.5678 10 -1 -.infinity)) + (print (get_digits 7 2 .infinity 0)) + (print (get_digits 16 2 .infinity 0)) + (print (get_digits 24 4 .infinity 0)) + (print (get_digits 40 3 .infinity 0)) + (print (get_digits 16 2 .infinity 0)) + (print (get_digits 16 8 .infinity 0)) + (print (get_digits 3 2 5 0)) + (print (get_digits 1.5 1.5 .infinity 0)) + (print (get_digits 3.75 1.5 .infinity -7)) + + (print (get_digits 1234567.8 10 0 4 (false))) + (print (get_digits 1234567.8 10 4 8 (false))) + (print (get_digits 1.2345678e100 10 0 4 (false))) + (print (get_digits 1.2345678e100 10 4 8 (false))) + + ;should print empty list for these + (print (get_digits 0 2.714 1 3 (false))) + (print (get_digits 0 2.714 1 3 (true))) + (print (get_digits 0 10 0 10 (false))) + + ;4 followed by zeros + (print (get_digits 0.4 10 0 10 (false))) + + (print "--set_digits--\n") + + (print (set_digits 1234567.8 10 (list 5 5 5)) "\n") + (print (set_digits 1234567.8 10 (list 5 5 5) -1 -.infinity) "\n") + (print (set_digits 7 2 (list 1 0 0) .infinity 0) "\n") + (print (set_digits 1.5 1.5 (list 1) .infinity 0) "\n") + (print (set_digits 1.5 1.5 (list 2) .infinity 0) "\n") + (print (set_digits 1.5 1.5 (list 1 0) 1 0) "\n") + (print (set_digits 1234567.8 10 (list 5 5 5) 10) "\n") + (print (set_digits 1.5 1.5 (list 1 0 0) 2 0) "\n") + + (print (set_digits 1234567.8 10 (list 5 5 5 5 5) 0 4 (false)) "\n") + (print (set_digits 1234567.8 10 (list 5 5 5 5 5) 4 8 (false)) "\n") + (print (set_digits 1.2345678e100 10 (list 5 5 5 5 5) 0 4 (false)) "\n") + (print (set_digits 1.2345678e100 10 (list 5 5 5 5 5) 4 8 (false)) "\n") + + ;these should all print (list 1 0 1) + (print (get_digits (set_digits 1234567.8 10 (list 1 0 1 0) 2 5 (false)) 10 2 5 (false))) + (print (get_digits (set_digits 1234567.8 2 (list 1 0 1 0) 2 5 (false)) 2 2 5 (false))) + (print (get_digits (set_digits 1234567.8 3.1 (list 1 0 1 0) 2 5 (false)) 3.1 2 5 (false))) + + (print "--floor--\n") + (print (floor 1.5) "\n") + + (print "--ceil--\n") + (print (ceil 1.5) "\n") + + (print "--round--\n") + (print (round 12.7) "\n") + (print (round 12.7 1) "\n") + (print (round 123.45678 5) "\n") + (print (round 123.45678 2) "\n") + (print (round 123.45678 2 2) "\n") + (print (round 123.45678 6 2) "\n") + (print (round 123.45678 4 0) "\n") + (print (round 123.45678 0 0) "\n") + (print (round 1.2345678 2 4) "\n") + (print (round 1.2345678 0 4) "\n") + (print (round 0.012345678 2 4) "\n") + (print (round 0.012345678 4 2) "\n") + (print (round 0.012345678 0 0) "\n") + (print (round 0.012345678 100 100) "\n") + + (print (round 0.6 2) "\n") + (print (round 0.6 32 2) "\n") + (print (round (/ 1 3) 32 1) "\n") + + (print "--exp--\n") + (print (exp 0.5) "\n") + + (print "--log--\n") + (print (log 0.5) "\n") + (print (log 16 2) "\n") + + (print "--sin--\n") + (print (sin 0.5) "\n") + + (print "--asin--\n") + (print (asin 0.5) "\n") + + (print "--cos--\n") + (print (cos 0.5) "\n") + + (print "--acos--\n") + (print (acos 0.5) "\n") + + (print "--tan--\n") + (print (tan 0.5) "\n") + + (print "--atan--\n") + (print (atan 0.5) "\n") + (print (atan 0.5 0.5) "\n") + + (print "--sinh--\n") + (print (sinh 0.5) "\n") + + (print "--asinh--\n") + (print (asinh 0.5) "\n") + + (print "--cosh--\n") + (print (cosh 0.5) "\n") + + (print "--acosh--\n") + (print (acosh 0.5) "\n") + + (print "--tanh--\n") + (print (tanh 0.5) "\n") + + (print "--atanh--\n") + (print (atanh 0.5) "\n") + + (print "--erf--\n") + (print (erf 0.5) "\n") + + (print "--tgamma--\n") + (print (tgamma 0.5) "\n") + + (print "--lgamma--\n") + (print (lgamma 0.5) "\n") + + (print "--sqrt--\n") + (print (sqrt 0.5) "\n") + + (print "--pow--\n") + (print (pow 0.5 2) "\n") + + (print "--abs--\n") + (print (abs -0.5) "\n") + + (print "--max--\n") + (print (max 0.5 1 7 9 -5) "\n") + (print (max (null) 4 8) "\n") + (print (max (null)) "\n") + + (print "--min--\n") + (print (min 0.5 1 7 9 -5) "\n") + (print (min (null) 4 8) "\n") + + (print "--dot_product--\n") + (print (dot_product (list 0.5 0.25 0.25) (list 4 8 8)) "\n") + (print (dot_product (associate "a" 0.5 "b" 0.25 "c" 0.25) (associate "a" 4 "b" 8 "c" 8)) "\n") + + (print (dot_product (associate "0" 0.5 "1" 0.25 "2" 0.25) (list 4 8 8)) "\n") + + (print "--generalized_distance--\n") + (print " 0 " (generalized_distance (null) (null) (null) (null) 0.01 (map 10000 (range 0 200)) ) "\n") + (print " 1 " (generalized_distance (null) (null) (null) (null) 0.01 (list 1 2 3) (list 0 2 3) ) "\n") + (print " 2 " (generalized_distance (null) (null) (null) (null) 2 (list 3 4) ) "\n") + (print " 3 " (generalized_distance (null) (null) (null) (null) -.infinity (list 3 4) ) "\n") + (print " 4 " (generalized_distance (list 0.3333 0.3333 0.3333) (null) (null) (null) 0.01 (list 1 2 3) (list 0 2 3) ) "\n") + (print " 5 " (generalized_distance (list 1 1) (null) (null) (null) 2 (list 3 4) ) "\n") + (print " 6 " (generalized_distance (list 0.5 0.5) (null) (null) (null) 2 (list 3 4) ) "\n") + (print " 7 " (generalized_distance (list 0.5 0.5) (null) (null) (null) 1 (list 3 4) ) "\n") + (print " 8 " (generalized_distance (list 0.5 0.5) (null) (null) (null) 0.5 (list 3 4) ) "\n") + (print " 9 " (generalized_distance (list 0.5 0.5) (null) (null) (null) 0.1 (list 3 4) ) "\n") + (print "10 " (generalized_distance (list 0.5 0.5) (null) (null) (null) 0.01 (list 3 4) ) "\n") + (print "11 " (generalized_distance (list 0.5 0.5) (null) (null) (null) 0.001 (list 3 4) ) "\n") + (print "12 " (generalized_distance (list 0.5 0.5) (null) (null) (null) 0 (list 3 4) ) "\n") + (print "13 " (generalized_distance (list 1 1) (null) (null) (null) 2 (list .nan 4) ) "\n") + (print "14 " (generalized_distance (list 1 1) (null) (null) (null) 0 (list .nan 4) ) "\n") + (print "15 " (generalized_distance (list 0.5 0.5) (null) (null) (null) 2 (list .nan 4) ) "\n") + (print "16 " (generalized_distance (list 0.5 0.5) (null) (null) (null) 0 (list .nan 4) ) "\n") + (print "17 " (generalized_distance (null) (list "nominal") (list 1) (null) 1 (list 1 2 3) (list 10 2 4) ) "\n") + (print "18 " (generalized_distance (null) (list "nominal") (list 1) (null) 1 (list 1 2 3) (list 10 2 10) ) "\n") + (print "19 " (generalized_distance (null) (list "nominal") (list 1) (null) 1 (list 1 2 3) (list 10 2 10) ) "\n") + (print "20 " (generalized_distance (list 0.3333 0.3333 0.3333) (list "nominal") (list 1) (null) 1 (list 1 2 3) (list 10 2 4) ) "\n") + (print "21 " (generalized_distance (list 0.3333 0.3333 0.3333) (list "nominal") (list 1) (null) 1 (list 1 2 3) (list 10 2 10) ) "\n") + (print "22 " (generalized_distance (list 0.3333 0.3333 0.3333) (list "nominal") (list 1) (null) 1 (list 1 2 3) (list 10 2 10) ) "\n") + (print "23 " (generalized_distance (list 0.3333 0.3333 0.3333) (list "nominal" "cyclic" "cyclic") (list 1 360 12) (null) 1 (list 1 2 3) (list 10 2 10) ) "\n") + (print "24 " (generalized_distance (list 0.3333 0.3333 0.3333) (list "nominal") (list 1) (list 0.25 180 -12) 1 (list 1 2 3) (list 10 2 10) ) "\n") + (print "25 " (generalized_distance (list 1 0 1 ) (list "continuous" "nominal" "nominal") (list (null) 5 5) (list .1 .1 .1 ) 2 (list 4 4 (null) ) (list 2 (null) (null) ) ) "\n" ) + (print "26 " (generalized_distance (list 1 0 1 ) (list "continuous" "nominal" "nominal") (list (null) 5 5) (null) 2 (list 4 4 (null) ) (list 2 (null) (null) ) ) "\n") + (print "27 " (generalized_distance (list 1 0 1 1 ) (list "continuous" "nominal" "nominal") (list (null) 5 5) (list .1 .1 .1 .1) 2 (list 4 4 (null) 4) (list 2 (null) (null) 2) ) "\n" ) + (print "28 " (generalized_distance (list 1 0 1 1 ) (list "continuous" "nominal" "nominal") (list (null) 5 5) (null) 2 (list 4 4 (null) 4) (list 2 (null) (null) 2) ) "\n" ) + (print "29 " (generalized_distance (list 1 0 1 1 1) (null) (null) (null) 1 (list 4 4 4 4 4) (list 2 (null) 2 2 2) ) "\n" ) + (print "30 " (generalized_distance (assoc x 1 y 1 z 1) + (assoc y "continuous" x "nominal" z "continuous") + (assoc z 5) + (null) + 1 + (list 4 4 4 ) + (list 2 2 2 ) + (null) + (list "x" "y" "z") ) "\n" ) + ;should print 4 + (print "31 " (generalized_distance (list 1 1 1) (list "continuous" "nominal" "nominal") (list (null) 5 5) (null) 1 (list 4 4 (null)) (list 2 2 (null))) "\n") + ;should print 4 + (print "32 " (generalized_distance (list 1 1 1 1) (list "continuous" "nominal" "nominal" "continuous") (list 2 5 5 2) (null) 0 (list 4 4 4 4) (list 2 2 2 (null))) "\n") + + ;should print 4 + (print "33 " (generalized_distance (list 1 1 1 1) (list "continuous" "nominal" "nominal" "continuous") (list 1 5 5 1) (null) 1 (list 4 "s" "s" 4) (list 2 "s" 2 (null))) "\n") + + ;should print 2 + (print "34 " (generalized_distance (list 1 1) (list "code" "nominal") (list 0 5) (null) 1 (list (list 1 2 3 4 5) "s") (list (list 1 2 3) "s") ) "\n") + + ;should print 3ish + (print "35 " (generalized_distance (list 1 1) (list "code" "nominal") (list 0 5) (null) 1 (list (list 1.5 2 3 4 5) "s") (list (list 1 2 3) "s") ) "\n") + + (print "--entropy--\n") + (print (entropy (list 0.5 0.5)) "\n") + (print (entropy (list 0.5 0.5) (list 0.25 0.75) -1 1) "\n") + (print (entropy (list 0.5 0.5) (list 0.25 0.75)) "\n") + (print (entropy 0.5 (list 0.25 0.75) -1 1) "\n") + (print (entropy 0.5 (list 0.25 0.75) 0 1) "\n") + (print (entropy (assoc A 0.5 B 0.5) (assoc B 0.25 A 0.75)) "\n") + + (print "--first--\n") + (print (first (list 4 9.2 "this")) "\n") + (print (first (associate "a" 1 "b" 2)) "\n") + (print (first 3) "\n") + (print (first 0) "\n") + (print (first "abc") "\n") + (print (first "") "\n") + + (print "--tail--\n") + (print (tail (list 4 9.2 "this"))) + (print (tail (list 1 2 3 4 5 6))) + (print (tail (list 1 2 3 4 5 6) 2)) + (print (tail (list 1 2 3 4 5 6) -2)) + (print (tail (list 1 2 3 4 5 6) -6)) + (print (tail (list 1 2 3 4 5 6) 6)) + (print (tail (list 1 2 3 4 5 6) 10)) + (print (tail (list 1 2 3 4 5 6) -10)) + (print (tail (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) )) + (print (tail (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) 2 )) + (print (tail (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) -2)) + (print (tail (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) 10)) + (print (tail (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) -10)) + (print (tail 3) "\n") + (print (tail 0) "\n") + (print (tail "abcdef") "\n") + (print (tail "abcdef" 2) "\n") + (print (tail "abcdef" -2) "\n") + (print (tail "abcdef" 6) "\n") + (print (tail "abcdef" -6) "\n") + (print (tail "abcdef" 10) "\n") + (print (tail "abcdef" -10) "\n") + (print (tail "") "\n") + + (print "--last--\n") + (print (last (list 4 9.2 "this")) "\n") + (print (last (associate "a" 1 "b" 2)) "\n") + (print (last 3) "\n") + (print (last 0) "\n") + (print (last "abc") "\n") + (print (last "") "\n") + + (print "--trunc--\n") + (print (trunc (list 4 9.2 "end"))) + (print (trunc (list 1 2 3 4 5 6))) + (print (trunc (list 1 2 3 4 5 6) 2)) + (print (trunc (list 1 2 3 4 5 6) -2)) + (print (trunc (list 1 2 3 4 5 6) -6)) + (print (trunc (list 1 2 3 4 5 6) 6)) + (print (trunc (list 1 2 3 4 5 6) 10)) + (print (trunc (list 1 2 3 4 5 6) -10)) + (print (trunc (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) )) + (print (trunc (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) 2 )) + (print (trunc (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) -2)) + (print (trunc (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) 10)) + (print (trunc (associate "a" 1 "b" 2 "c" 3 "d" 4 "e" 5 "f" 6) -10)) + (print (trunc 3) "\n") + (print (trunc 0) "\n") + (print (trunc "abcdef") "\n") + (print (trunc "abcdef" 2) "\n") + (print (trunc "abcdef" -2) "\n") + (print (trunc "abcdef" 6) "\n") + (print (trunc "abcdef" -6) "\n") + (print (trunc "abcdef" 10) "\n") + (print (trunc "abcdef" -10) "\n") + (print (trunc "") "\n") + + (print "--append--\n") + (print (append (list 1 2 3) (list 4 5 6) (list 7 8 9))) + (print (append (list 1 2 3) (associate "a" 4 "b" 5 "c" 6) (list 7 8 9) (associate "d" 10 "e" 11))) + (print (append (list 4 9.2 "this") "end")) + (print (append (associate 0 4 1 9.2 2 "this") "end")) + + (print "--size--\n") + (print (size (list 4 9.2 "this")) "\n") + (print (size (associate "a" 1 "b" 2 "c" 3 4 "d")) "\n") + (print (size "hello") "\n") + + (print "--range--\n") + (print (range 0 10)) + (print (range 10 0)) + (print (range 0 5 0)) + (print (range 0 5 1)) + (print (range 12 0 5 1)) + + (print (range + (lambda (+ (target_index) 1)) + 0 5 1 + )) + + (print ||(range + (lambda (+ (target_index) 1)) + 0 5 1 + )) + + (print "--replace--\n") + (print (replace (list (associate "a" 13)) )) + + (print (replace + (list (associate "a" 1)) + (list 2) 1 + (list 0) (list 4 5 6))) + +(print (replace + (list (associate "a" 1)) + 2 1 + 0 (list 4 5 6))) + + + (print (replace + (list (associate "a" 1)) + (list 0) (lambda (set (target_value) "b" 2)) + )) + + (print "--rewrite--\n") + (print (rewrite + (lambda (if (~ (target_value) 0) (+ (target_value) 1) (target_value)) ) + (list (associate "a" 13)) )) + + ;rewrite all integer additions into multiplies and then fold constants +(print (rewrite + (lambda + ;find any nodes with a + and where its list is filled to its size with integers + (if (and + (= (get_type (target_value)) (lambda (+))) + (= (size (target_value)) (size (filter (lambda (~ (target_value) 0)) (target_value))) ) + ) + (reduce (lambda (* (target_value 1) (target_value)) ) (target_value)) + (target_value) + ) + ) + ;original code with additions to be rewritten + (lambda + (list (associate "a" (+ 3 (+ 13 4 2)) )) ) + ) ) + + ;rewrite numbers as sums of position in the list and the number (all 8s) +(print (rewrite + (lambda + ;find any nodes with a + and where its list is filled to its size with integers + (if + (= (get_type_string (target_value)) "number") + + (+ (target_value) (get_value (target_index))) + + (target_value) + ) + ) + ;original code with additions to be rewritten + (lambda + (list 8 7 6 5 4 3 2 1 0) ) + ) ) + +(print (rewrite + (lambda + (if (and + (= (get_type (target_value)) (lambda (+))) + (= (size (target_value)) (size (filter (lambda (~ (target_value) 0)) (target_value))) ) + ) + (reduce (lambda (+ (target_value 1) (target_value)) ) (target_value)) + (target_value) + ) + ) + (lambda + (+ (+ 13 4) a) ) ) + ) + + (print "--map--\n") + (print (map (lambda (* (target_value) 2)) (list 1 2 3 4))) + (print (map (lambda (+ (target_value) (target_index) )) (list 10 1 20 2 30 3 40 4))) + (print (map (lambda (+ (target_value) (target_index) )) (associate 10 1 20 2 30 3 40 4))) + + (print (map + (lambda + (+ (get (target_value) 0) (get (target_value) 1)) + ) + (list 1 2 3 4 5 6) + (list 2 2 2 2 2 2) + )) + + + (print (map + (lambda + (+ (get (target_value) 0) (get (target_value) 1)) + ) + (list 1 2 3 4 5) + (list 2 2 2 2 2 2) + )) + + (print (map + (lambda + (+ (get (target_value) 0) (get (target_value) 1) (get (target_value) 2)) + ) + (associate "0" 0 "1" 1 "a" 3) + (associate "0" 1 "a" 4) + (list 2 2 2 2) + )) + + (print "--filter--\n") + (print (filter (lambda (> (target_value) 2)) (list 1 2 3 4))) + (print (filter (lambda (< (target_index) 3)) (list 10 1 20 2 30 3 40 4))) + (print (filter (lambda (< (target_index) 20)) (associate 10 1 20 2 30 3 40 4))) + + (print (filter (list 10 1 20 (null) 30 .nan .nas 40 4))) + (print (filter (list 10 1 20 (null) 30 "" 40 4))) + (print (filter (assoc a 10 b 1 c 20 d "" e 30 f 3 g (null) h 4))) + (print (filter (assoc a 10 b 1 c 20 d "" e 30 f 3 g (null) h 4))) + + (print "--weave--\n") + + (print (weave (list 1 2 3)) "\n") + (print (weave (list 1 3 5) (list 2 4 6)) "\n") + (print (weave (null) (list 2 4 6) (null) ) "\n") + (print (weave "a" (list 2 4 6)) "\n") + (print (weave (null) (list 1 4 7) (list 2 5 8) (list 3 6 9)) "\n") + + (print (weave (list 1 3 5 7 9 11) (list 2 4 6 8 10 12)) "\n") + + (print (weave + (lambda + (target_value) + ) + (list 1 3 5 7 9 11) + (list 2 4 6 8 10 12) + )"\n") + + (print (weave + (lambda + (map (lambda + (* 2 (target_value)) + ) + (target_value) + ) + ) + (list 1 3 5 7 9 11) + (list 2 4 6 8 10 12) + )"\n") + + (print (weave + (lambda + (list (apply "min" (target_value 1)) ) + ) + (list 1 3 4 5 5 6) + (list 2 2 3 4 6 7) + )"\n") + + (print (weave + (lambda + (if (<= (get (target_value) 0) 4) + (list (apply "min" (target_value 1)) ) + (target_value) + ) + ) + (list 1 3 4 5 5 6) + (list 2 2 3 4 6 7) + )"\n") + + (print + (weave + (lambda + (if (>= (first (target_value)) 3) + (list (first (target_value 1))) + (list) + ) + ) + (list 1 2 3 4 5) + (null) + ) + ) + + (print "--reduce--\n") + (print (reduce (lambda (* (target_value) (target_value 1))) (list 1 2 3 4)) "\n") + (print (reduce (lambda (* (target_value) (target_value 1))) (associate "a" 1 "b" 2 "c" 3 "d" 4)) "\n") + + (print "--apply--\n") + (print (apply (lambda (+)) (list 1 2 3 4)) "\n") + (print (apply (lambda (+ 5)) (list 1 2 3 4)) "\n") + (print (apply "+" (list 1 2 3 4)) "\n") + + (print "--reverse--\n") + (print (reverse (list 1 2 3 4 5))) + + (print "--sort--\n") + (print (sort (list 4 9 3 5 1))) + (print (sort (list "n" "b" "hello" "soy" 4 1 3.2 (list 1 2 3)))) + (print (sort (list 1 "1x" "10" 20 "z2" "z10" "z100"))) + (print (sort (list 1 "001x" "010" 20 "z002" "z010" "z100"))) + (print (sort (lambda (- (target_value) (target_value 1))) (list 4 9 3 5 1))) + (print (sort (lambda (- (rand) (rand)) ) (range 0 10) )) + + (print (sort (list + "2020-06-08 lunes 11.33.36" + "2020-06-08 lunes 11.32.47" + "2020-06-08 lunes 11.32.49" + "2020-06-08 lunes 11.32.37" + "2020-06-08 lunes 11.33.48" + "2020-06-08 lunes 11.33.40" + "2020-06-08 lunes 11.33.45" + "2020-06-08 lunes 11.33.42" + "2020-06-08 lunes 11.33.47" + "2020-06-08 lunes 11.33.43" + "2020-06-08 lunes 11.33.38" + "2020-06-08 lunes 11.33.39" + "2020-06-08 lunes 11.32.36" + "2020-06-08 lunes 11.32.38" + "2020-06-08 lunes 11.33.37" + "2020-06-08 lunes 11.32.58" + "2020-06-08 lunes 11.33.44" + "2020-06-08 lunes 11.32.48" + "2020-06-08 lunes 11.32.46" + "2020-06-08 lunes 11.32.57" + "2020-06-08 lunes 11.33.41" + "2020-06-08 lunes 11.32.39" + "2020-06-08 lunes 11.32.59" + "2020-06-08 lunes 11.32.56" + "2020-06-08 lunes 11.33.46" + ))) + + (print "--indices--\n") + (print (indices (associate "a" 1 "b" 2 "c" 3 4 "d"))) + (print (indices (list "a" 1 "b" 2 "c" 3 4 "d"))) + + (print "--values--\n") + (print (values (associate "a" 1 "b" 2 "c" 3 4 "d"))) + (print (values (list "a" 1 "b" 2 "c" 3 4 "d"))) + + (print (values (list "a" 1 "b" 2 "c" 3 4 "d" 1 2 3 4 "a" "b" "c") (true))) + (print (values (associate "a" 1 "b" 2 "c" 3 4 "d" "e" 1") (true))) + + (print (values (append (range 1 20) (range 1 20)) (true))) + + (print "--contains_index--\n") + (print (contains_index (associate "a" 1 "b" 2 "c" 3 4 "d") "c") "\n") + (print (contains_index (associate "a" 1 "b" 2 "c" 3 4 "d") "m") "\n") + (print (contains_index (list "a" 1 "b" 2 "c" 3 4 "d") 2) "\n") + (print (contains_index (list "a" 1 "b" 2 "c" 3 4 "d") 100) "\n") + + (print "--contains_value--\n") + (print (contains_value (associate "a" 1 "b" 2 "c" 3 4 "d") 1) "\n") + (print (contains_value (associate "a" 1 "b" 2 "c" 3 4 "d") 44) "\n") + (print (contains_value (list "a" 1 "b" 2 "c" 3 4 "d") "d") "\n") + (print (contains_value (list "a" 1 "b" 2 "c" 3 4 "d") 100) "\n") + + (print (contains_value "hello world" ".*world")) + (print (contains_value "abcdefg" "a.*g")) + (print (contains_value "3.141" "[0-9]+\\.[0-9]+")) + (print (contains_value "3.141" "\\d+\\.\\d+")) + (print (contains_value "3.a141" "\\d+\\.\\d+")) + (print (contains_value "abc\r\n123" "(.|\r)*\n.*")) + + (print "--remove--\n") + (print (remove (associate "a" 1 "b" 2 "c" 3 4 "d") 4)) + (print (remove (list "a" 1 "b" 2 "c" 3 4 "d") 4)) + + (print (remove (associate "a" 1 "b" 2 "c" 3 4 "d") (list 4 "a") )) + (print (remove (list "a" 1 "b" 2 "c" 3 4 "d") (list 4 "a") )) + + (print (remove (list 0 1 2 3 4 5) (list 0 2) )) + (print (remove (list 0 1 2 3 4 5) -1 )) + (print (remove (list 0 1 2 3 4 5) (list 0 -1) )) + (print (remove (list 0 1 2 3 4 5) (list 5 0 1 2 3 4 5 6) )) + + (print "--keep--\n") + (print (keep (associate "a" 1 "b" 2 "c" 3 4 "d") 4)) + (print (keep (list "a" 1 "b" 2 "c" 3 4 "d") 4)) + + (print (keep (associate "a" 1 "b" 2 "c" 3 4 "d") (list 4 "a") )) + (print (keep (list "a" 1 "b" 2 "c" 3 4 "d") (list 4 "a") )) + + (print (keep (list 0 1 2 3 4 5) (list 0 2) )) + (print (keep (list 0 1 2 3 4 5) -1 )) + (print (keep (list 0 1 2 3 4 5) (list 0 -1) )) + (print (keep (list 0 1 2 3 4 5) (list 5 0 1 2 3 4 5 6) )) + + (print "--zip--\n") + (print (zip (list "a" "b" "c" "d") (list 1 2 3 4))) + (print (zip (list "a" "b" "c" "d") )) + (print (zip (list "a" "b" "c" "d") 3)) + (print (zip (lambda (target_value)) (list "a" "b" "c" "d" "a") (list 1 2 3 4 4))) + (print (zip (lambda (+ (target_value 1) (target_value))) (list "a" "b" "c" "d" "a") (list 1 2 3 4 4))) + (print (zip (lambda (+ (target_value 1) (target_value))) (list "a" "b" "c" "d" "a") 1)) + + (print "--unzip--\n") + (print (unzip (associate "a" 1 "b" 2 "c" 3) (list "a" "b"))) + (print (unzip (list 1 2 3) (list 0 -1 1))) + + (print "--get--\n") + (print (get (list 4 9.2 "this"))) + (print (get (list 4 9.2 "this") 1) "\n") + (print (get (associate "a" 1 "b" 2 "c" 3 4 "d") "c") "\n") + (print (get + (list 0 1 2 3 (list 0 1 2 (associate "a" 1))) + (list 4 3 "a") + ) "\n") + + (print (get (list 4 9.2 "this") 1 2) "\n") + + (declare (assoc + get_test_assoc + (assoc + "A" (associate "B" 2 ) + "B" 2 + ) + )) + + (print "2: " (get get_test_assoc (list "A" "B")) "\n") + (print "(null): " (get get_test_assoc (list "A" "C")) "\n") + (print "(null): " (get get_test_assoc (list "B" "C")) "\n") + + (print (get (assoc .nas 3) .nas) "\n") + + (print "--set--\n") + (print (set (associate "a" 1 "b" 2 "c" 3 4 "d") "e" 5)) + (print (set (list 0 1 2 3 4) 2 10)) + (print (set (associate "a" 1 "b" 2) "a" 3)) + + (print "--target--\n") + (list 1 2 3 (print (target)) 4) + + (print "--target_index--\n") + (list 1 2 3 (print (target_index) "\n") 4) + (list 1 2 3 (print (target_index 1) "\n") 4) + (list 1 2 3 (print (target_index 2) "\n") 4) + (list 1 2 3 (print (target_index 3) "\n") 4) + + (print "--target_value--\n") + (list 1 2 3 (print (target_value) "\n") 4) + + (print "--stack--\n") + (print (stack)) + + (print "--args--\n") + (print (args)) + (let (associate "bbb" 3) + (print (args)) + (print (args 1)) + ) + + (print "--and--\n") + (print (and 1 4.8 "true") "\n") + (print (and 1 0.0 "true") "\n") + + (print "--or--\n") + (print (or 1 4.8 "true") "\n") + (print (or 1 0.0 "true") "\n") + (print (or 0 0.0 "") "\n") + + (print "--xor--\n") + (print (xor 1 4.8 "true") "\n") + (print (xor 1 0.0 "true") "\n") + + (print "--not--\n") + (print (not 1) "\n") + (print (not "") "\n") + + (print "--=--\n") + (print (= 4 4 5) "\n") + (print (= 4 4 4) "\n") + (print (= (sqrt -1) .nan) "\n") + (print (= .nan .nan) "\n") + (print (= .infinity .infinity) "\n") + (print (= .infinity -.infinity) "\n") + + (print "--!=--\n") + (print (!= 4 4) "\n") + (print (!= 4 5) "\n") + (print (!= 4 4 5) "\n") + (print (!= 4 4 4) "\n") + (print (!= 4 4 "hello" 4) "\n") + (print (!= 4 4 4 1 3.0 "hello") "\n") + (print (!= 1 2 3 4 5 6 "hello") "\n") + + (print "--<--\n") + (print (< 4 5) "\n") + (print (< 4 4) "\n") + (print (< 4 5 6) "\n") + (print (< 4 5 6 5) "\n") + + (print "--<=--\n") + (print (<= 4 5) "\n") + (print (<= 4 4) "\n") + (print (<= 4 5 6) "\n") + (print (<= 4 5 6 5) "\n") + (print (<= (null) 2) "\n") + (print (<= 2 .nan) "\n") + + (print "-->--\n") + (print (> 6 5) "\n") + (print (> 4 4) "\n") + (print (> 6 5 4) "\n") + (print (> 6 5 4 5) "\n") + + (print "-->=--\n") + (print (>= 6 5) "\n") + (print (>= 4 4) "\n") + (print (>= 6 5 4) "\n") + (print (>= 6 5 4 5) "\n") + (print (>= (null) 2) "\n") + (print (>= 2 .nan) "\n") + + (print "--~--\n") + (print (~ 1 4 5) "\n") + (print (~ 1 4 "a") "\n") + + (print "--!~--\n") + (print (!~ "true" "false" (list 3 2)) "\n") + (print (!~ "true" 1 (list 3 2)) "\n") + + (print "--rand--\n") + (print (rand) "\n") + (print (rand) "\n") + + (print (rand 50) "\n") + (print (rand (list 1 2 4 5 7)) "\n") + + (print (rand (range 0 10)) "\n") + (print (rand (range 0 10) 0) "\n") + (print (rand (range 0 10) 1) "\n") + (print (rand (range 0 10) 10 (true)) "\n") + + (print (rand 50 4) "\n") + + (print "--weighted_rand--\n") + (print (weighted_rand (associate "a" .25 "b" .75)) "\n") + (print (weighted_rand (associate "a" .25 "b" .75) 4) "\n") + + (print (weighted_rand (list (list "a" "b") (list .25 .75)) ) "\n") + (print (weighted_rand (list (list "a" "b") (list .25 .75)) 4) "\n") + (print (weighted_rand (list (list "a" "b") (list 0 0)) 4 ) "\n" ) + + (print "infinity test c or d: " (weighted_rand (associate "a" .25 "b" .75 "c" .infinity "d" .infinity) 4) "\n") + (print "infinity test c or d: " (weighted_rand (list (list "a" "b" "c" "d") (list .25 .75 .infinity .infinity)) 4) "\n") + + ;these should come out somewhere near the correct proportions + (print (zip (lambda (+ (target_value 1) (target_value))) (weighted_rand (associate "a" .25 "b" .5 "c" .25) 100) 1) "\n") + + ;these should come out somewhere near the correct proportions + (print (zip (lambda (+ (target_value 1) (target_value))) (weighted_rand (list (list "a" "b" "c") (list .25 .5 .25)) 100) 1) "\n") + + ;these should be weighted toward smaller numbers + (print + (weighted_rand + (zip + (range 1 10) + (map (lambda + (/ (/ 1 (target_value)) 2) + ) + (range 1 10) + ) + ) + 3 + (true) + ) + "\n") + + (print "--get_rand_seed--\n") + + (print (get_rand_seed) "\n") + + (print "--set_rand_seed--\n") + + (declare (assoc cur_seed (get_rand_seed))) + (print (rand) "\n") + (print (rand) "\n") + (set_rand_seed cur_seed) + (print (rand) "\n") + (print (rand) "\n") + + + (print "--true--\n") + (print (true) "\n") + + (print "--false--\n") + (print (false) "\n") + + (print "--null--\n") + (print (null) "\n") + (print (lambda (null (+ 3 5) 7))) + + (print "--node null--\n") + (print (lambda (#nulltest null))) + + (print "--infinity--\n") + (print .infinity "\n") + (print (- (* 3 .infinity)) "\n") + + (print "--nan--\n") + (print .nan "\n") + + (print "--list--\n") + (print (list "a" 1 "b")) + + (print "--associate--\n") + (print (associate "a" 1 "b" 2 "c" 3 4 "d")) + + (print "--assoc--\n") + (print (assoc b 2 c 3)) + + (print "--get_type--\n") + (print (get_type (lambda (+ 3 4)))) + + (print "--get_type_string--\n") + (print (get_type_string (lambda (+ 3 4))) "\n") + (print (get_type_string "hello") "\n") + + (print "--set_type--\n") + (print (set_type (lambda (+ 3 4)) "-")) + (print (set_type (associate "a" 4 "b" 3) "list")) + (print (set_type (associate "a" 4 "b" 3) (list))) + (print (set_type (list "a" 4 "b" 3) "assoc")) + (print (call (set_type (list 1 0.5 "3.2" 4) "+")) "\n") + (print (set_type + (list + (set_labels (lambda (+ 3 4)) (list "react") ) + ) + "parallel" + )) + (print "--format--\n") + (print + (map (lambda + (format (target_value) "int8" "number") + ) + (explode "abcdefgﬗꭖϡ" 1) + ) + ) + + (print "1: " (format 65 "number" "int8") "\n") + (print "2: " (format (format -100 "number" "double") "double" "number") "\n") + (print "3: " (format (format -100 "number" "float") "float" "number") "\n") + (print "4: " (format (format 100 "number" "uint32") "uint32" "number") "\n") + (print "5: " (format (format 123456789 "number" "UINT32") "uint32" "number") "\n") + (print "6: " (format (format 123456789 "number" "UINT32") "UINT32" "number") "\n") + (print "7: " (format (format 14294967296 "number" "uint64") "uint64" "number") "\n") + + (print "8: " (format "A" "int8" "number") "\n") + (print "9: " (format "ôBT3_+}ÿ" "double" "number") "\n") + (print "10: " (format -100 "float" "number") "\n") + (print "11: " (format 65 "uint8" "string") "\n") + + (print "12: " (format 254 "uint8" "Base16") "\n") + (print "13: " (format "AAA" "string" "Base16") "\n") + (print "14: " (format "414141" "Base16" "string") "\n") + + (print "15: " (format "Many hands make light work." "string" "Base64") "\n") + (print "16: " (format "Many hands make light work.." "string" "Base64") "\n") + (print "17: " (format "Many hands make light work..." "string" "Base64") "\n") + (print "18: " (format "TWFueSBoYW5kcyBtYWtlIGxpZ2h0IHdvcmsu" "Base64" "string") "\n") + (print "19: " (format "TWFueSBoYW5kcyBtYWtlIGxpZ2h0IHdvcmsuLg==" "Base64" "string") "\n") + (print "19: " (format "TWFueSBoYW5kcyBtYWtlIGxpZ2h0IHdvcmsuLi4=" "Base64" "string") "\n") + (print "20: " (format "[{\"a\" : 3, \"b\" : 4}, {\"c\" : \"c\"}]" "json" "code")) + (print "21: " (format (list (assoc a 3 b 4) (assoc c "c" d (null))) "code" "json") "\n") + (print "22: " (format (list (assoc a 3 b 4) (assoc c "c" d (null))) "code" "json" (null) (assoc sort_keys (true)) ) "\n") + (print "23: " (format (assoc "a" 1 "b" 2 "c" 3 "d" 4 "e" (list "a" "b" .nan .infinity)) "code" "yaml") "\n") + (print "24: " (format (assoc "a" 1 "b" 2 "c" 3 "d" 4 "e" (list "a" "b" .nan .infinity)) "code" "yaml" (null) (assoc "sort_keys" (true))) "\n") + (print "25: " (format "a: 1" "yaml" "code")) + + ;current date + (print "current date-time in epoch: " (format (system_time) "number" "date:%Y-%m-%d-%H.%M.%S") "\n") + + (print (format 1591503779 "number" "date:%F %T") "\n") + + ;month-year dates + (print (format "Feb 2014" "date:%b %Y" "number" ) "\n") + (print (format "2014-Feb" "date:%Y-%h" "number" ) "\n") + (print (format "02/2014" "date:%m/%Y" "number" ) "\n") + + ;erroneous date + (print (format 1591505665002 "number" "date:%F %T") "\n") + + (print (format 1591330905 "number" "date:%F %T") "\n") + (print (format 1591330905 "number" "date:%c %Z") "\n") + (print (format 1591330905 "number" "date:%S") "\n") + (print (format 1591330905 "number" "date:%Oe") "\n") + + ;erroneous format strings + (print (format 1591330905 "number" "date:%s") "\n") + (print (format 1591330905 "number" "date:%s%") "\n") + (print (format 1591330905 "number" "date:%a%b%c%d%e%f") "\n") + (print (format "abcd" "date:%Y-%m-%d" "date:%A, %b %d, %Y" (assoc locale "en_US") (assoc locale "es_ES")) "\n") + + (print (format "2020-06-07" "date:%Y-%m-%d" "date:%A, %b %d, %Y" (assoc locale "en_US") (assoc locale "etete123")) "\n") + (print (format "2020-06-07" "date:%Y-%m-%d" "date:%A, %b %d, %Y" (assoc locale "notalocale") (assoc locale "es_ES")) "\n") + + ;converting from strings + (print (format "2020-06-07" "date:%Y-%m-%d" "number") "\n") + (print (format "2020-06-07" "date:%Y-%m-%d" "date:%b %d, %Y") "\n") + (print (format "2020-06-07" "date:%Y-%m-%d" "date:%A, %b %d, %Y" (assoc locale "en_US") (assoc locale "es_ES")) "\n") + + (print (format "1970-01-08 11.33.48" "date:%Y-%m-%d %H.%M.%S" "number") "\n") + (print (format "1960-01-08 11.33.48" "date:%Y-%m-%d %H.%M.%S" "number") "\n") + + (print + (format + (format "1960-01-08 11.33.48" "date:%Y-%m-%d %H.%M.%S" "number") + "number" + "date:%Y-%m-%d %H.%M.%S" + ) + "\n") + + (print + (format + (+ 0.01 (format "1960-01-08 11.33.48" "date:%Y-%m-%d %H.%M.%S" "number")) + "number" + "date:%Y-%m-%d %H.%M.%S" + ) + "\n") + + (print "--get_labels--\n") + (print (get_labels + ( #labelA lambda #labelB + (true)))) + + (print "--get_all_labels--\n") + (print (get_all_labels + (lambda (#label21 print "hello world: " (* #label-number-22 3 4) #label23 " and " (* 1 2) )) )) + + (print (get_all_labels (lambda + ( #labelA #labelQ * #labelB + (+ 1 3) 2)))) + + (print (get_all_labels (lambda + ( #labelA #labelQ * #labelB + (+ 1 #labelA 3) 2)))) + + (print "--set_labels--\n") + (print (set_labels + ( lambda + (#labelC true)) (list "labelD" "labelE"))) + + (print "--zip_labels--\n") + (print (zip_labels (list "l1" "l2" "l3") (list 1 2 3))) + + (print "--get_comments--\n") + (print (get_comments + ;this is a comment + (lambda ;comment too + (true))) "\n") + + (print "--set_comments--\n") + (print (set_comments + ;this is a comment + (lambda ;comment too + (true)) "new comment")) + + (print "--get_concurrency--\n") + (print (get_concurrency + (lambda (print "hello")) + ) "\n") + + (print (get_concurrency + (lambda ||(print "hello")) + ) "\n") + + (print (get_concurrency + (set_concurrency + (lambda (print "hello")) + (true) + ) + ) "\n") + + (print "--set_concurrency--\n") + + (print (set_concurrency + (lambda (print "hello")) + (true) + ) "\n") + + (print (set_concurrency + (lambda + ;complex test + #somelabel (assoc a "hello" b 4) + ) + (true) + ) "\n") + + (print "--get_value--\n") + (print (get_value + ;this is a comment + (lambda ;comment too + #withalabel (true)))) + + (print "--set_value--\n") + (print (set_value + ;this is a comment + (lambda ;comment too + (true)) 3) "\n") + + (print "--explode--\n") + (print (explode "abcdefgﬗꭖϡ")) + (print (explode "abcdefgﬗꭖϡ" 1)) + (print (explode "abcdefgﬗꭖϡ" 2)) + (print (explode "abcdefgﬗꭖϡ" 3)) + (print (explode "abcdefgﬗꭖϡ" 4)) + + (print "--split--\n") + + (print (split "hello world")) + (print (split "hello world" " ")) + (print (split "hello\r\nworld\r\n!" "\r\n")) + (print (split "hello world !" "\\s" 1)) + (print (split "hello to the world" "to" (null) 2)) + + (print (split "abcdefgﬗꭖϡ")) + (print (split "abc de fgﬗꭖϡ" " ")) + (print (split "abc\r\nde\r\nfgﬗꭖϡ" "\r\n")) + (print (split "abc de fgﬗꭖϡ" " " 1)) + (print (split "abc de fgﬗꭖϡ" " de " (null) 4)) + + (print "--substr--\n") + + (print (substr "hello world") "\n") + (print (substr "hello world" 1) "\n") + (print (substr "hello world" 1 8) "\n") + (print (substr "hello world" 1 100) "\n") + (print (substr "hello world" 1 -1) "\n") + (print (substr "hello world" -4 -1) "\n") + (print (substr "hello world" -4 -1 (null) 1) "\n") + (print (substr "hello world" 1 3 "x") "\n") + + (print (substr "hello world" "(e|o)") "\n") + (print (substr "hello world" "[h|w](e|o)") "\n") + + (print (substr "hello world" "[h|w](e|o)" 1) "\n") + (print (substr "hello world" "[h|w](e|o)" "all") "\n") + (print (substr "hello world" "(([h|w])(e|o))" "all") "\n") + + (print (substr "hello world" "[h|w](e|o)" -1) "\n") + (print (substr "hello world" "[h|w](e|o)" "submatches") "\n") + + (print (substr "hello world" "(([h|w])(e|o))" "submatches") "\n") + (print (substr "hello world" "(?:([h|w])(?:e|o))" "submatches") "\n") + + ;invalid syntax test + (print (substr "hello world" "(?([h|w])(?:e|o))" "submatches") "\n") + + (print (substr "hello world" "(e|o)" (null) "[$&]") "\n") + (print (substr "hello world" "(e|o)" 2 "[$&]") "\n") + + (print (substr "abcdefgﬗꭖϡ") "\n") + (print (substr "abcdefgﬗꭖϡ" 1) "\n") + (print (substr "abcdefgﬗꭖϡ" 1 8) "\n") + (print (substr "abcdefgﬗꭖϡ" 1 100) "\n") + (print (substr "abcdefgﬗꭖϡ" 1 -1) "\n") + (print (substr "abcdefgﬗꭖϡ" -4 -1) "\n") + (print (substr "abcdefgﬗꭖϡ" -4 -1 (null) 1) "\n") + (print (substr "abcdefgﬗꭖϡ" 1 3 "x") "\n") + + (print "--concat--\n") + (print (concat "hello" " " "world") "\n") + + (print "--crypto_sign and crypto_sign_verify--\n") + (declare (zip (list "public_sign_key" "secret_sign_key") (system "sign_key_pair"))) + (declare (assoc message "hello")) + (declare (assoc signature (crypto_sign message secret_sign_key))) + (print "valid signature: " (crypto_sign_verify message public_sign_key signature)) + + (print "--encrypt and decrypt--\n") + (print "symmetric key encryption\n") + (declare (zip (list "public_encrypt_key" "secret_encrypt_key") (system "encrypt_key_pair"))) + (declare (assoc encrypted (encrypt message secret_encrypt_key "1234"))) + (print "decrypted: " (decrypt encrypted secret_encrypt_key "1234") "\n") + + (print "public key encryption\n") + (declare (zip (list "alice_public_encrypt_key" "alice_secret_encrypt_key") (system "encrypt_key_pair"))) + (declare (zip (list "bob_public_encrypt_key" "bob_secret_encrypt_key") (system "encrypt_key_pair"))) + (assign (assoc encrypted (encrypt message bob_public_encrypt_key "1234" alice_secret_encrypt_key))) + (print "decrypted: " (decrypt encrypted alice_public_encrypt_key "1234" bob_secret_encrypt_key) "\n") + + (print "--print--\n") + (print (list 0 1 10 12 100 120 122 1000 1000.123 10000 100000 .1 .01 .001 .0001 .00001 .0000123456789 1.2345e-149) ) + (print (list -0 -1 -10 -12 -100 -120 -122 -1000 -1000.123 -10000 -100000 -.1 -.01 -.001 -.0001 -.00001 -.0000123456789 -1.2345e-149) ) + (print .nan "\n" .infinity "\n" (true) "\n" (false) "\n") + + (print "--total_size--\n") + (print (total_size (list 1 2 3 (associate "a" 3 "b" 4) (list 5 6))) "\n") + + (print "--mutate--\n") + (print (mutate + (lambda (list 1 2 3 4 5 6 7 8 9 10 11 12 13 14 (associate "a" 1 "b" 2))) + 0.4)) + + (print (mutate + (lambda (list 1 2 3 4 + (associate "alpha" 5 "beta" 6) + (associate "nest" + (associate "count" (list 7 8 9)) + "end" (list 10 11 12)))) + 0.2 + (associate "+" 0.5 "-" 0.3 "*" 0.2) + (associate "change_type" 0.08 "delete" 0.02 "insert" 0.9) + )) + + (print "--commonality--\n") + (print (commonality + (lambda (seq 2 (get_entity_comments) 1)) + (lambda (seq 2 1 4 (get_entity_comments))) + ) "\n") + + (print (commonality + (list 1 2 3 (associate "a" 3 "b" 4) (lambda (if true 1 (parallel (get_entity_comments) 1))) (list 5 6)) + (list 1 2 3 (associate "c" 3 "b" 4) (lambda (if true 1 (parallel 1 (get_entity_comments)))) (list 5 6)) + ) "\n") + + (print (commonality .infinity 3) "\n") + (print (commonality .nan 3) "\n") + (print (commonality .infinity .infinity) "\n") + (print (commonality .infinity -.infinity) "\n") + (print (commonality "hello" "hello") "\n") + (print (commonality "hello" "hello" (true)) "\n") + (print (commonality "hello" "el") "\n") + (print (commonality "hello" "el" (true)) "\n") + (print (commonality "el" "hello" (true)) "\n") + + (print (commonality + (lambda + (assoc a 1 b 2 c 3) + ) + (lambda + (if x + (assoc a 1 b 2 c 3) + (false) + ) + ) + ) "\n") + + (print (commonality + (list 1 2 3) + (list + (list 1 2 3) + ) + ) "\n" + ) + (print (commonality + (list 1 2 3) + (lambda (null 1 2 3)) + ) "\n" + ) + + (print "--edit_distance--\n") + (print (edit_distance + (lambda (seq 2 (get_entity_comments) 1)) + (lambda (seq 2 1 4 (get_entity_comments))) + ) "\n") + + (print (edit_distance + (list 1 2 3 (associate "a" 3 "b" 4) (lambda (if true 1 (parallel (get_entity_comments) 1))) (list 5 6)) + (list 1 2 3 (associate "c" 3 "b" 4) (lambda (if true 1 (parallel 1 (get_entity_comments)))) (list 5 6)) + ) "\n") + + (print (edit_distance "hello" "hello") "\n") + (print (edit_distance "hello" "hello" (true)) "\n") + (print (edit_distance "hello" "el") "\n") + (print (edit_distance "hello" "el" (true)) "\n") + (print (edit_distance "el" "hello" (true)) "\n") + + (print (edit_distance + (list 1 2 3) + (lambda (parallel + (list 1 2 3) + )) + ) "\n" + ) + + (print "--intersect--\n") + (print (intersect + (list 1 (lambda (- 4 2)) (associate "a" 3 "b" 4)) + (list 1 (lambda (- 4 2)) (associate "c" 3 "b" 4)) + )) + + (print (intersect + (lambda (seq 2 (get_entity_comments) 1)) + (lambda (seq 2 1 4 (get_entity_comments))) + )) + + (print (intersect + (lambda (parallel 2 (get_entity_comments) 1)) + (lambda (parallel 2 1 4 (get_entity_comments))) + )) + + (print (intersect + (list 1 2 3 (associate "a" 3 "b" 4) (lambda (if true 1 (parallel (get_entity_comments) #label-not-1 1))) (list 5 6)) + (list 1 2 3 (associate "c" 3 "b" 4) (lambda (if true 1 (parallel #label-not-1 1 (get_entity_comments)))) (list 5 6)) + )) + + (print (intersect + (lambda (list 1 (associate "a" 3 "b" 4))) + (lambda (list 1 (associate "c" 3 "b" 4))) + )) + + (print (intersect + (lambda (replace 4 2 6 1 7)) + (lambda (replace 4 1 7 2 6)) + )) + + (print (intersect + (lambda (list + ;comment 1 + ;comment 2 + ;comment 3 + 1 + 3 5 7 9 11 13)) + (lambda (list + ;comment 2 + ;comment 3 + ;comment 4 + 1 + 4 6 8 10 12 14)) + )) + + (print (intersect + (list 1 2 3) + (list (list 1 2 3)) + ) "\n" + ) + + (print "--union--\n") + + (print (union + (lambda (seq 2 (get_entity_comments) 1)) + (lambda (seq 2 1 4 (get_entity_comments))) + )) + + (print (union + (list 1 (lambda (- 4 2)) (associate "a" 3 "b" 4)) + (list 1 (lambda (- 4 2)) (associate "c" 3 "b" 4)) + )) + + (print (union + (lambda (parallel 2 (get_entity_comments) 1)) + (lambda (parallel 2 1 4 (get_entity_comments))) + )) + + (print (union + (list 1 2 3 (associate "a" 3 "b" 4) (lambda (if true 1 (parallel (get_entity_comments) #label-not-1 1))) (list 5 6)) + (list 1 2 3 (associate "c" 3 "b" 4) (lambda (if true 1 (parallel #label-not-1 1 (get_entity_comments)))) (list 5 6)) + )) + + (print (union + (lambda (list 1 (associate "a" 3 "b" 4))) + (lambda (list 1 (associate "c" 3 "b" 4))) + )) + + (print (union (list 3 2) (list 3 4) ) ) + (print (union (list 2 3) (list 3 2 4) ) ) + + (print (union + (lambda (list + ;comment 1 + ;comment 2 + ;comment 3 + 1 + 2 + 3 5 7 9 11 13)) + (lambda (list + ;comment 2 + ;comment 3 + ;comment 4 + 1 + ;comment x + 2 + 4 6 8 10 12 14)) + )) + + (print (union + (list 1 2 3) + (list (list 1 2 3)) + ) "\n" + ) + (print (union + (list (list 1 2 3)) + (list 1 2 3) + ) "\n" + ) + + (print (union + (list 1 2 3) + (lambda (parallel + (list 1 2 3) + )) + ) "\n" + ) + + (print "--difference--\n") + (print (difference + (lambda (assoc a 1 b 2 c 4 d 7 e 10 f 12 g 13)) + (lambda (list a 2 c 4 d 6 q 8 e 10 f 12 g 14)) + )) + (print (difference + (assoc a 1 b 2 c 4 d 7 e 10 f 12 g 13) + (assoc a 2 c 4 d 6 q 8 e 10 f 12 g 14) + )) + (print (difference + (lambda (list 1 2 4 7 10 12 13)) + (lambda (list 2 4 6 8 10 12 14)) + )) + (print (difference + (lambda (assoc a 1 b 2 c 4 d 7 e 10 f 12 g 13)) + (lambda (assoc a 2 c 4 d 6 q 8 e 10 f 12 g 14)) + )) + + (print (difference + (lambda (assoc a 1 g (list 1 2))) + (lambda (assoc a 2 g (list 1 4))) + )) + + (print (difference + (lambda (associate a 1 g (list 1 2))) + (lambda (associate a 2 g (list 1 4))) + )) + +(print (difference + (zip (list 1 2 3 4 5)) + (append (zip (list 2 6 5)) (assoc a 1)) + )) + + (print (difference + (zip (list 1 2 3 4 5)) + (zip (list 2 6 5)) + )) + + (print (difference + (zip (list 1 2 5)) + (zip (list 2 6 5)) + )) + + (let (assoc + x (lambda (list 6 (list 1 2))) + y (lambda (list 7 (list 1 4))) + ) + + (print (difference x y)) + (print (call (difference x y) (assoc _ x))) + ) + + ;test when the difference is not a list or assoc + (let (assoc + x (lambda (list (+ 0 1) (list 1 2))) + y (lambda (list (+ 7 8) (list 1 4))) + ) + + (print (difference x y)) + (print (call (difference x y) (assoc _ x))) + ) + + (let (assoc + x (lambda (list 6 (list (list "a" "b") 1 2))) + y (lambda (list 7 (list (list "a" "x") 1 4))) + ) + + (print (difference x y)) + (print (call (difference x y) (assoc _ x))) + ) + + (print "--mix--\n") + (print (mix + (lambda (list 1 3 5 7 9 11 13)) + (lambda (list 2 4 6 8 10 12 14)) + 0.5 0.5 0)) + + (print (mix + (lambda (list + ;comment 1 + ;comment 2 + ;comment 3 + 1 + 3 5 7 9 11 13)) + (lambda (list + ;comment 2 + ;comment 3 + ;comment 4 + 1 + 4 6 8 10 12 14)) + 0.5 0.5 0)) + + (print (mix + (lambda (list 1 2 (associate "a" 3 "b" 4) (lambda (if true 1 (parallel (get_entity_comments) 1))) (list 5 6)) ) + (lambda (list 1 5 3 (associate "a" 3 "b" 4) (lambda (if false 1 (parallel (get_entity_comments) (lambda (print (list 2 9))) ))) ) ) + 0.8 0.8 0.5)) + + (print (mix + (lambda (list 1 2 (associate "a" 3 "b" 4) (lambda (if true 1 (parallel (get_entity_comments) 1))) (list 5 6)) ) + (lambda (list 1 5 3 (assoc a 3 b 4) (lambda (if false 1 (seq (get_entity_comments) (lambda (print (list 2 9))) ))) ) ) + 0.8 0.8 1.0)) + + (print (mix + (lambda (list (true) 3 5 7 9 11 13)) + (lambda (list 2 4 6 8 10 12 14)) + 0.5 0.5 1.0)) + + (print (mix + (lambda (list (true) 3 5 7 9 11 13)) + (lambda (list 2 4 6 8 10 12 14)) + 0.5 0.5 -1)) + + (print (mix 1 4 0.5 0.5 -1) "\n") + (print (mix 1 4 0.5 0.5 -0.8) "\n") + (print (mix 1 4 0.5 0.5 0.5) "\n") + (print (mix 1 4 0.5 0.5 1) "\n") + + (print (mix "abcdexyz" "abcomxyz" 0.5 0.5) "\n") + (print (mix "abcdexyz" "abcomxyz" 0.5 0.5) "\n") + (print (mix "abcdexyz" "abcomxyz" 0.5 0.5) "\n") + + (print "--mix_labels--\n") + (print (mix_labels + (lambda (list 1 #mixtest1 2 #mixtest2 (associate "a" 3 "b" 4) (lambda (if #mixtest3 true 1 (parallel (get_entity_comments) #mixtest4 1))) (list 5 6)) ) + (lambda (list 1 #mixtest1 5 #mixtest2 3 (associate "a" 3 "b" 4) (lambda (if #mixtest3 false 1 (parallel (get_entity_comments) #mixtest4 (lambda (print (list 2 9))) ))) ) ) + 0.5)) + + (print "--total_entity_size--\n") + + (create_entities "MergeEntity1" (lambda (associate "a" 3 "b" 4)) ) + (create_entities (list "MergeEntity1" "MergeEntityChild1") (lambda (associate "x" 3 "y" 4)) ) + (create_entities (list "MergeEntity1" "MergeEntityChild2") (lambda (associate "p" 3 "q" 4)) ) + (create_entities (list "MergeEntity1") (lambda (associate "E" 3 "F" 4)) ) + (create_entities (list "MergeEntity1") (lambda (associate "e" 3 "f" 4 "g" 5 "h" 6)) ) + + (create_entities "MergeEntity2" (lambda (associate "c" 3 "b" 4)) ) + (create_entities (list "MergeEntity2" "MergeEntityChild1") (lambda (associate "x" 3 "y" 4 "z" 5)) ) + (create_entities (list "MergeEntity2" "MergeEntityChild2") (lambda (associate "p" 3 "q" 4 "u" 5 "v" 6 "w" 7)) ) + (create_entities (list "MergeEntity2") (lambda (associate "E" 3 "F" 4 "G" 5 "H" 6)) ) + (create_entities (list "MergeEntity2") (lambda (associate "e" 3 "f" 4)) ) + + (print (total_entity_size "MergeEntity1") "\n") + (print (total_entity_size "MergeEntity2") "\n") + + (print "--flatten_entity--\n") + + (create_entities "FlattenTest" (lambda + (parallel ##a (rand) ) + ) ) + (print (call_entity "FlattenTest" "a") "\n") + (print (get_entity_rand_seed "FlattenTest") "\n") + + (create_entities (list "FlattenTest" "DeepRand") (lambda + (parallel ##a (rand) ) + ) ) + (declare (assoc seed (get_entity_rand_seed "FlattenTest"))) + (print (call_entity "FlattenTest" "a") "\n") + (set_entity_rand_seed "FlattenTest" seed) + (print (call_entity "FlattenTest" "a") "\n") + + (set_entity_rand_seed "FlattenTest" seed) + (print (call_entity (list "FlattenTest" "DeepRand") "a") "\n") + (set_entity_rand_seed "FlattenTest" seed) + (print (call_entity (list "FlattenTest" "DeepRand") "a") "\n") + + (print "flatten restore with seeds test\n") + (let (assoc fe (flatten_entity "FlattenTest")) + (print fe) + (print (flatten_entity (call fe))) + (print (difference_entities "FlattenTest" (call fe))) + ) + + (print "flatten restore without seeds test\n") + (let (assoc fe (flatten_entity "FlattenTest" (false))) + (print fe) + (print (flatten_entity (call fe))) + (print (difference_entities "FlattenTest" (call fe))) + ) + + (print "flatten restore with parallel\n") + (let (assoc fe (flatten_entity "FlattenTest" (true) (true))) + (print fe) + (print (flatten_entity (call fe))) + (print (difference_entities "FlattenTest" (call fe))) + ) + + (print "--mutate_entity--\n") + + (create_entities + "MutateEntity" + (lambda (list 1 2 3 4 5 6 7 8 9 10 11 12 13 14 (associate "a" 1 "b" 2))) + ) + (mutate_entity "MutateEntity" 0.4 "MutatedEntity") + (mutate_entity "MutateEntity" 0.5 "MutatedEntity2") + (mutate_entity "MutateEntity" 0.5 "MutatedEntity3" + (associate "+" 0.5 "-" 0.3 "*" 0.2) + (associate "change_type" 0.08 "delete" 0.02 "insert" 0.9) + ) + + (print (retrieve_entity_root "MutatedEntity") "\n") + (print (retrieve_entity_root "MutatedEntity2") "\n") + (print (retrieve_entity_root "MutatedEntity3") "\n") + + + (print "--commonality_entities--\n") + + (print (commonality_entities "MergeEntity1" "MergeEntity2") "\n") + + (print "--edit_distance_entities--\n") + + (print (edit_distance_entities "MergeEntity1" "MergeEntity2") "\n") + + (print "--intersect_entities--\n") + + (intersect_entities "MergeEntity1" "MergeEntity2" "AndedEntities") + (print (retrieve_entity_root "AndedEntities")) + (map + (lambda (seq + (print (target_value) "\n") + (print (retrieve_entity_root (list "AndedEntities" (target_value 1)))) + )) + (contained_entities "AndedEntities") + ) + + + (print "--union_entities--\n") + + (union_entities "MergeEntity1" "MergeEntity2" "OredEntities") + (print (retrieve_entity_root "OredEntities")) + (map + (lambda (seq + (print (target_value) "\n") + (print (retrieve_entity_root (list "OredEntities" (target_value 1)))) + )) + (contained_entities "OredEntities") + ) + + (create_entities "ComplexMergeEntity1" (lambda (parallel #p (null))) ) + (create_entities (list "ComplexMergeEntity1") (lambda (associate "E" 3 "F" 4)) ) + (create_entities (list "ComplexMergeEntity1") (lambda (associate "e" 3 "f" 4 "g" 5 "h" 6)) ) + + (create_entities "ComplexMergeEntity2" (lambda (parallel #p (null))) ) + (create_entities (list "ComplexMergeEntity2") (lambda (associate "E" 3 "F" 4 "G" 5 "H" 6)) ) + (create_entities (list "ComplexMergeEntity2") (lambda (associate "e" 3 "f" 4)) ) + + (assign_to_entities "ComplexMergeEntity1" (assoc p (contained_entities "ComplexMergeEntity1"))) + (assign_to_entities "ComplexMergeEntity2" (assoc p (contained_entities "ComplexMergeEntity2"))) + + (union_entities "ComplexMergeEntity1" "ComplexMergeEntity2" "ComplexOredEntities") + (print (retrieve_entity_root "ComplexOredEntities")) + (map + (lambda (seq + (print (target_value) "\n") + (print (retrieve_entity_root (list "ComplexOredEntities" (target_value 1)))) + )) + (contained_entities "ComplexOredEntities") + ) + + (print "--difference_entities--\n") + (print (difference_entities "MergeEntity1" "MergeEntity1")) + (print (difference_entities "MergeEntity1" "MergeEntity2")) + (print (call (difference_entities "MergeEntity1" "MergeEntity2") (assoc _ "MergeEntity1")) "\n") + + (create_entities "DiffEntity1" (lambda (associate "a" 3 "b" 4)) ) + (create_entities (list "DiffEntity1" "DiffEntityChild1") (lambda (associate "x" 3 "y" 4 "z" 6)) ) + (create_entities (list "DiffEntity1" "DiffEntityChild1" "DiffEntityChild2") (lambda (associate "p" 3 "q" 4 "u" 5 "v" 6 "w" 7)) ) + (create_entities (list "DiffEntity1" "DiffEntityChild1" "DiffEntityChild2" "DiffEntityChild3") (lambda (associate "e" 3 "p" 4 "a" 5 "o" 6 "w" 7)) ) + (create_entities (list "DiffEntity1" "OnlyIn1") (lambda (associate "m" 4)) ) + (create_entities (list "DiffEntity1") (lambda (associate "E" 3 "F" 4)) ) + (create_entities (list "DiffEntity1") (lambda (associate "e" 3 "f" 4 "g" 5 "h" 6)) ) + + (create_entities "DiffEntity2" (lambda (associate "c" 3 "b" 4)) ) + (create_entities (list "DiffEntity2" "DiffEntityChild1") (lambda (associate "x" 3 "y" 4 "z" 5)) ) + (create_entities (list "DiffEntity2" "DiffEntityChild1" "DiffEntityChild2") (lambda (associate "p" 3 "q" 4 "u" 5 "v" 6 "w" 7)) ) + (create_entities (list "DiffEntity2" "DiffEntityChild1" "DiffEntityChild2" "DiffEntityChild3") (lambda (associate "e" 3 "p" 4 "a" 5 "o" 6 "w" 7)) ) + (create_entities (list "DiffEntity2" "OnlyIn2") (lambda (associate "o" 6)) ) + (create_entities (list "DiffEntity2") (lambda (associate "E" 3 "F" 4 "G" 5 "H" 6)) ) + (create_entities (list "DiffEntity2") (lambda (associate "e" 3 "f" 4)) ) + + (print (contained_entities "DiffEntity2")) + + (print (difference_entities "DiffEntity1" "DiffEntity2")) + + (let (assoc new_entity + (call (difference_entities "DiffEntity1" "DiffEntity2") (assoc _ "DiffEntity1"))) + (print new_entity "\n") + (print (retrieve_entity_root new_entity)) + (print (retrieve_entity_root (list new_entity "DiffEntityChild1"))) + (print (contained_entities new_entity)) + ) + + (create_entities "DiffContainer" null) + + (create_entities (list "DiffContainer" "DiffEntity1") (lambda (associate "a" 3 "b" 4)) ) + (create_entities (list "DiffContainer" "DiffEntity1" "DiffEntityChild1") (lambda (associate "x" 3 "y" 4 "z" 6)) ) + (create_entities (list "DiffContainer" "DiffEntity1" "DiffEntityChild1" "DiffEntityChild2") (lambda (associate "p" 3 "q" 4 "u" 5 "v" 6 "w" 7)) ) + (create_entities (list "DiffContainer" "DiffEntity1" "DiffEntityChild1" "DiffEntityChild2" "DiffEntityChild3") (lambda (associate "e" 3 "p" 4 "a" 5 "o" 6 "w" 7)) ) + (create_entities (list "DiffContainer" "DiffEntity1" "OnlyIn1") (lambda (associate "m" 4)) ) + (create_entities (list "DiffContainer" "DiffEntity1") (lambda (associate "E" 3 "F" 4)) ) + (create_entities (list "DiffContainer" "DiffEntity1") (lambda (associate "e" 3 "f" 4 "g" 5 "h" 6)) ) + + (create_entities (list "DiffContainer" "DiffEntity2") (lambda (associate "c" 3 "b" 4)) ) + (create_entities (list "DiffContainer" "DiffEntity2" "DiffEntityChild1") (lambda (associate "x" 3 "y" 4 "z" 6)) ) + (create_entities (list "DiffContainer" "DiffEntity2" "DiffEntityChild1" "DiffEntityChild2") (lambda (associate "p" 3 "q" 4 "u" 5 "v" 6 "w" 7)) ) + (create_entities (list "DiffContainer" "DiffEntity2" "DiffEntityChild1" "DiffEntityChild2" "DiffEntityChild3") (lambda (associate "e" 3 "p" 4 "a" 5 "o" 6 "w" 7)) ) + (create_entities (list "DiffContainer" "DiffEntity2" "OnlyIn2") (lambda (associate "o" 6)) ) + (create_entities (list "DiffContainer" "DiffEntity2") (lambda (associate "E" 3 "F" 4 "G" 5 "H" 6)) ) + (create_entities (list "DiffContainer" "DiffEntity2") (lambda (associate "e" 3 "f" 4)) ) + + (print (difference_entities (list "DiffContainer" "DiffEntity1") (list "DiffContainer" "DiffEntity2") )) + + (let (assoc new_entity + (call (difference_entities (list "DiffContainer" "DiffEntity1") (list "DiffContainer" "DiffEntity2") ) + (assoc _ (list "DiffContainer" "DiffEntity1") ))) + (print new_entity "\n") + (print (retrieve_entity_root new_entity)) + (print (retrieve_entity_root (list new_entity "DiffEntityChild1"))) + (print (contained_entities new_entity)) + ) + + (print "--mix_entities--\n") + (mix_entities "MergeEntity1" "MergeEntity2" 0.5 0.5 0.5 0.2 "MixedEntities") + (print (retrieve_entity_root "MixedEntities")) + (map + (lambda (seq + (print (target_value) "\n") + (print (retrieve_entity_root (list "MixedEntities" (target_value 1)))) + )) + (contained_entities "MixedEntities") + ) + + (print "--get_entity_comments--\n") + (print (get_entity_comments) "\n") + + (create_entities "descriptive_entity" (lambda + ;this is a fully described entity + (null + ;some public variable + ##publicvar 1 + ;some private variable + ##!privatevar 2 + ;a variable accessible to contained entities + ##^containervar 3 + ;the function foo + ##foo (declare (assoc + ;the value of x + x + ;the default value of x + 1 + ;the value of y + y 2 + ) + ( + x y) + + ) + + ;returns the api details + ##get_api (seq + + (assoc + "description" + (get_entity_comments) + + "labels" + (map (lambda + (assoc + "description" + (target_value 1) + "parameters" + (get_entity_comments (null) (target_index 1) (true)) + ) + ) + (get_entity_comments (null) (null) (true)) + ) + + + ) + + ) + ) + + + )) + + (print (get_entity_comments "descriptive_entity") "\n") + (print (get_entity_comments "descriptive_entity" (null) (true))) + (print (get_entity_comments "descriptive_entity" "foo" (true))) + + (print (call_entity "descriptive_entity" "get_api")) + + (print "--retrieve_entity_root--\n") + (create_entities "SetGetCodeTest" (lambda (list 1 2 ##three 3))) + (print (retrieve_entity_root "SetGetCodeTest")) + (print (retrieve_entity_root "SetGetCodeTest" 1)) + + (print "--assign_entity_roots--\n") + (assign_entity_roots "SetGetCodeTest" (list 4 5 6)) + (print (retrieve_entity_root "SetGetCodeTest")) + + (print "--accum_entity_roots--\n") + (create_entities "AER_test" (lambda (null ##a 1 ##b 2))) + (accum_entity_roots "AER_test" (lambda (list ##c 3))) + (print (retrieve_entity_root "AER_test" 1)) + + (create_entities "AER_test_2" (lambda (null))) + (accum_entity_roots "AER_test_2" (lambda (list ##c 3))) + (print (retrieve_entity_root "AER_test_2" 1)) + + (print "--get_entity_rand_seed--\n") + (create_entities "RandTest" (lambda + (parallel ##a (rand) ) + ) ) + (print (call_entity "RandTest" "a") "\n") + (print (get_entity_rand_seed "RandTest") "\n") + + (print "--set_entity_rand_seed--\n") + (create_entities (list "RandTest" "DeepRand") (lambda + (parallel ##a (rand) ) + ) ) +(assign (assoc seed (get_entity_rand_seed "RandTest"))) +(print (call_entity "RandTest" "a") "\n") +(set_entity_rand_seed "RandTest" seed (false)) +(print (call_entity "RandTest" "a") "\n") + +(print "deep sets\n") +(set_entity_rand_seed "RandTest" seed) +(print (call_entity (list "RandTest" "DeepRand") "a") "\n") +(set_entity_rand_seed "RandTest" seed) +(print (call_entity (list "RandTest" "DeepRand") "a") "\n") + + (print "--get_entity_root_permission--\n") + (create_entities "RootTest" (lambda (print (system_time)) )) + (print (get_entity_root_permission "RootTest") "\n") + + (print "--set_entity_root_permission--\n") + (print (set_entity_root_permission "RootTest" 1) "\n") + (call_entity "RootTest") + (print "\n") + (print (get_entity_root_permission "RootTest") "\n") + (print (set_entity_root_permission "RootTest" 0) "\n") + (call_entity "RootTest") + (print "\n") + (print (get_entity_root_permission "RootTest") "\n") + + (print "--create_entities--\n") + (print (create_entities "MyLibrary" (lambda (+ #three 3 4)) ) "\n") + + (print "--nested create_entities--\n") + (create_entities "EntityWithChildren" (lambda (associate "a" 3 "b" 4)) ) + (create_entities (list "EntityWithChildren" "Child1") (lambda (associate "x" 3 "y" 4)) ) + (create_entities (list "EntityWithChildren" "Child2") (lambda (associate "p" 3 "q" 4)) ) + (print (contained_entities "EntityWithChildren")) + + (print (create_entities "MultipleTest1" (null)) "\n") + (print (create_entities "MultipleTest2" (null)) "\n") + + (print "--clone_entities--\n") + (print (clone_entities "MyLibrary" "MyNewLibrary") "\n") + + (print "--move_entities--\n") + (print (move_entities "MyLibrary" "MyLibrary2") "\n") + + (print "--destroy_entities--\n") + (print (contained_entities)) + (destroy_entities "MyLibrary2") + (print (contained_entities)) + + (destroy_entities "MultipleTest1" "MultipleTest2") + (print (contained_entities)) + + (print "--load--\n") + (print (load "amlg_code/module_test.amlg")) + + (print "load from .json:\n") + (print (load "amlg_code/module_test.json")) + + (print "load from .yaml:\n") + (print (load "amlg_code/module_test.yaml")) + + (print "--load_entity--\n") + (print "load from .amlg:\n") + (load_entity "amlg_code/module_test.amlg" "ModuleTest") + (call_entity "ModuleTest" "hello") + (print (flatten_entity "ModuleTest")) + + + (print "--load_persistent_entity--\n") + (load_persistent_entity "amlg_code/persist_module_test.amlg" "PersistModuleTest") + (assign_to_entities "PersistModuleTest" (assoc a 8)) + (assign_to_entities (list "PersistModuleTest" "psm") (assoc a 5)) + (create_entities (list "PersistModuleTest" "NewModule") (lambda (associate "a" 1 "b" 2)) ) + (load_entity "amlg_code/persist_module_test.amlg" "PersistModuleTestResults") + (print (flatten_entity "PersistModuleTestResults")) + (assign_to_entities "PersistModuleTest" (assoc a 1)) + (assign_to_entities (list "PersistModuleTest" "psm") (assoc a 8)) + (destroy_entities (list "PersistModuleTest" "NewModule")) + (load_entity "amlg_code/persist_module_test.amlg" "PersistModuleTestResults2") + (print (flatten_entity "PersistModuleTestResults2")) + + ;test the ability to delete nested persistent entities + (create_entities "pt" (list 1 2 3 4)) + (create_entities (list "pt" "child") (list 5 6 7)) + (create_entities (list "pt" "child" "doublechild1") (list 8 9)) + (create_entities (list "pt" "child" "doublechild2") (list 10 11)) + (store_entity "amlg_code/pt.amlg" "pt") + (destroy_entities "pt") + (load_persistent_entity "amlg_code/pt.amlg" "ptl") + (destroy_entities (list "ptl" "child")) + + ; persistent grandchild test + (print "Load Root:\n" (load_persistent_entity "amlg_code/persistent_tree_test_root.amlg" "PersistTreeRoot") "\n") + (print "Load Inter:\n" (load_entity "amlg_code/persistent_tree_test_inter.amlg" (list "PersistTreeRoot" "PersistTreeInter")) "\n") + (print "Load Leaf:\n" (load_persistent_entity "amlg_code/persistent_tree_test_leaf.amlg" (list "PersistTreeRoot" "PersistTreeInter" "PersistTreeLeaf")) "\n") + (print "Root contained:\n" (contained_entities "PersistTreeRoot") "\n") + (print "Root b:\n" (retrieve_from_entity "PersistTreeRoot" "b") "\n") + (print "Inter contained:\n" (contained_entities (list "PersistTreeRoot" "PersistTreeInter")) "\n") + + (assign_to_entities (list "PersistTreeRoot" "PersistTreeInter" "PersistTreeLeaf") (assoc "f" 2)) + (print "Leaf f:\n" (retrieve_from_entity (list "PersistTreeRoot" "PersistTreeInter" "PersistTreeLeaf") "f") "\n") + (assign_to_entities (list "PersistTreeRoot" "PersistTreeInter" "PersistTreeLeaf") (assoc "f" 6)) + (print "Leaf f:\n" (retrieve_from_entity (list "PersistTreeRoot" "PersistTreeInter" "PersistTreeLeaf") "f") "\n") + + (load_entity "amlg_code/persistent_tree_test_leaf.amlg" (list "PersistTreeRoot" "leaf_backup")) + (call_entity "PersistTreeRoot" "kill_inter") + (store_entity "amlg_code/persistent_tree_test_leaf.amlg" (list "PersistTreeRoot" "leaf_backup")) + (call_entity "PersistTreeRoot" "clean_backup") + + (print "--store--\n") + (store "amlg_code/store_test.amlg" (list 1 2 3 4)) + (print (load "amlg_code/store_test.amlg")) + + ;test escaped labels + (declare (assoc + entity + (first (create_entities + (set_type + (list (set_labels 1 (list ".#blah"))) + (lambda (parallel)) + ) + )) + )) + + (print (retrieve_entity_root entity 1)) + (print "retrieved: " (retrieve_from_entity entity ".#blah") "\n\n") + + (store_entity "amlg_code/escaped_label.amlg" entity) + (load_entity "amlg_code/escaped_label.amlg" "escaped_label") + + (print "loaded from file:\n" (retrieve_entity_root "escaped_label" 1)) + (print "retrieved: " (retrieve_from_entity "escaped_label" ".#blah") "\n") + + (print "--store other file formats---\n") + (store "amlg_code/text_store_test.txt" "This is text!") + (print "[" (load "amlg_code/text_store_test.txt") "]\n") + + (store "amlg_code/cstl_store_test.cstl" (list "String 1." "String 2." "String 3.")) + (print (load "amlg_code/cstl_store_test.cstl") "\n") + + (store "amlg_code/caml_store_test.caml" (lambda (seq (print "hello"))) ) + (print (load "amlg_code/caml_store_test.caml") "\n") + + ;test escaping contained entity filenames + (create_entities "quackerz?" "test") + (create_entities (list "quackerz?" "!@#$%^&*)(_+=-\'][{}.marbles") (lambda ##blah1 12)) + (create_entities (list "quackerz?" "buklulu is good.amlg") "hello world!") + + (print "contained entities in quackers before file: " (contained_entities "quackerz?") "\n") + + (store_entity "amlg_code/!quackerz.amlg" "quackerz?" (true)) + (load_entity "amlg_code/!quackerz.amlg" "read-back quackers" (true)) + + (print "contained entity in quackers2 loaded back from file: " (contained_entities "read-back quackers") "\n") + + ;CSV + (declare (assoc csv_data + (list + (list 6.4 2.8 5.6 2.2 "virginica") + (list 4.9 2.5 4.5 1.7 "virg\"inica") + (list) + (list "" "" "" (null)) + (list 4.9 3.1 1.5 0.1 "set\nosa" 3) + (list 4.4 3.2 1.3 0.2 "setosa") + ) + )) + (store "amlg_code/csv_store_test.csv" csv_data) + (print (load "amlg_code/csv_store_test.csv") ) + + (print "--store_entity--\n") + (print "store to .amlg:\n") + (assign_to_entities "ModuleTest" (assoc a 2)) + (print (unparse (retrieve_from_entity "ModuleTest" "a") (true) (true))) + (store_entity "amlg_code/module_test2.amlg" "ModuleTest") + (load_entity "amlg_code/module_test2.amlg" "ModuleTest2") + (print (unparse (retrieve_from_entity "ModuleTest" "a") (true) (true))) + (assign_to_entities "ModuleTest" (assoc a 1)) + (store_entity "amlg_code/module_test2.amlg" "ModuleTest") + + (store_entity "amlg_code/module_testc.caml" "ModuleTest") + (load_entity "amlg_code/module_test_c.caml" "ModuleTestDecompressed") + (print "Compression difference: [" (difference_entities "ModuleTest" "ModuleTestDecompressed") "]\n") + + (print "store to .json in amlg format\n") + (store "amlg_code/module_test.json" (list (assoc a 3 b 4) (assoc c "c" d (null))) (false) "amlg") + (print (load "amlg_code/module_test.json" (false) "amlg")) + + (print "store to .json normally\n") + (store "amlg_code/module_test.json" (list (assoc a 3 b 4) (assoc c "c" d (null)))) + (print (load "amlg_code/module_test.json")) + + (print "--contains_entity--\n") + + (print (contains_entity "MyNewLibrary") "\n") + (print (contains_entity (list "MyNewLibrary" "foo")) "\n") + + (print "--contained_entities--\n") + + (create_entities "TestContainerExec" + (lambda (parallel + ##^a 3 + ##b (contained_entities) + ##c (+ x 1) + ##d (call_entity "Child5" "q" (assoc x x)) + ##!e 12 + ##x 4 + ##y 5 + )) + ) + (create_entities (list "TestContainerExec" "Child1") + (lambda (parallel + ##x 3 + ##y 4 + ##!e 7 + ##weight 0.45 + ##weight_eq 1 + )) + ) + (create_entities (list "TestContainerExec" "Child2") + (lambda (parallel + ##x -1 + ##y -1 + ##weight 0.45 + ##weight_eq 1 + )) + ) + (create_entities (list "TestContainerExec" "Child3") + (lambda (parallel + ##x 100 + ##y 100 + ##weight 0.02 + ##weight_eq 1 + )) + ) + (create_entities (list "TestContainerExec" "Child4") + (lambda (parallel + ##x 100 + ##y 100 + ##radius 400 + ##weight 0.02 + ##weight_eq 1 + )) + ) + (create_entities (list "TestContainerExec" "Child5") + (lambda (parallel + ##p 3 + ##q (+ x (call_container "a")) + ##bar "crunchy" + ##weight 0.02 + ##weight_eq 1 + )) + ) + (create_entities (list "TestContainerExec" "Child6") + (lambda (parallel + ##x 1 + ##y 2 + ##bar "not crunchy" + ##weight 0.02 + ##weight_eq 1 + )) + ) + (create_entities (list "TestContainerExec" "Child7") + (lambda (parallel + ##x 0 + ##y 10 + ##weight 0.02 + ##weight_eq 1 + )) + ) + + (print (contained_entities "TestContainerExec")) + + (print "--query_select--\n") + (print (contained_entities "TestContainerExec" (list + (query_select 3) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_select 3 1) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_select 100 2) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_select 2 0 1) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_select 2 2 1) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_select 2 4 1) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_select 4 (null) (rand)) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_select 4 (null) (rand)) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_not_exists "q") + (query_select 2 3) + ))) + + (print "--query_sample--\n") + (print (contained_entities "TestContainerExec" (list + (query_sample) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_sample 2 ) + ))) + + + (print (contained_entities "TestContainerExec" (list + (query_sample 1 (rand) ) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_sample 1 (null) ) + ))) + + (print "--query_weighted_sample--\n") + + (print (contained_entities "TestContainerExec" (list + (query_weighted_sample "weight") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_weighted_sample "weight") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_weighted_sample "weight" 20 (rand) ) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_weighted_sample "weight" 20 (null) ) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_not_in_entity_list (list "Child1")) + (query_weighted_sample "weight" 10 (rand) ) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_weighted_sample "weight" 10 (rand) ) + (query_not_in_entity_list (list "Child1")) + ))) + + (print "--query_in_entity_list--\n") + (print (contained_entities "TestContainerExec" (list + (query_in_entity_list (list "Child6" "Child7")) + ))) + + (print "--query_not_in_entity_list--\n") + (print (contained_entities "TestContainerExec" (list + (query_in_entity_list (list "Child1" "Child2" "Child3" "Child4" "Child5" "Child6" "Child7" "Child8" "Child9")) + (query_not_in_entity_list (list "Child6" "Child7")) + ))) + + (print "--query_count--\n") + (print (compute_on_contained_entities "TestContainerExec" (list + (query_count) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_not_exists "q") + (query_count) + )) "\n") + + (print "--query_exists--\n") + (print (contained_entities "TestContainerExec" (list + (query_exists "q") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "!e") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_equals "bar" "crunchy") + (query_exists "q") + ))) + + (print "--query_not_exists--\n") + (print (contained_entities "TestContainerExec" (list + (query_not_exists "q") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "q") + (query_not_exists "eeee") + ))) + + (print "--query_equals--\n") + (print (contained_entities "TestContainerExec" (list + (query_equals "p" 3) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "bar") + (query_equals "p" 3) + ))) + + (print "--query_not_equals--\n") + (print (contained_entities "TestContainerExec" (list + (query_not_equals "x" 100) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "y") + (query_not_equals "x" 100) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_equals "bar" "crunchy") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_not_equals "x" 100) + ))) + + (print "--query_between--\n") + (print (contained_entities "TestContainerExec" (list + (query_between "x" 0 5) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_between "x" 0 5) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_between "x" -4 5) + (query_between "y" -4 0) + ))) + + (print "--query_not_between--\n") + (print (contained_entities "TestContainerExec" (list + (query_not_between "x" 0 5) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_between "bar" "apple" "kangaroo") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_not_between "bar" "apple" "kangaroo") + ))) + + (print "cascading global query: " (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_not_between "y" -4 5) + ))) + + (print "--query_among--\n") + + (print "among x = 0, 100: " (contained_entities "TestContainerExec" (list + (query_among "x" (list 100 0)) + ))) + + (print "among x = 0, 100: " (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_among "x" (list 100 0)) + ))) + + (print "among bar = not crunchy: " (contained_entities "TestContainerExec" (list + (query_among "bar" (list "not crunchy")) + ))) + + (print "among bar = not crunchy: " (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_among "bar" (list "not crunchy")) + ))) + + + (print "--query_not_among--\n") + + (print "not_among x = 0, 100: " (contained_entities "TestContainerExec" (list + (query_not_among "x" (list 100 0)) + ))) + + (print "not_among x = 0, 100: " (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_not_among "x" (list 100 0)) + ))) + + (print "not_among bar = not crunchy: " (contained_entities "TestContainerExec" (list + (query_not_among "bar" (list "not crunchy")) + ))) + + (print "not_among bar = not crunchy: " (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_not_among "bar" (list "not crunchy")) + ))) + + (print "--query_nearest_generalized_distance--\n") + (print "cascading query_not_equals: " + (contained_entities "TestContainerExec" (list + (query_not_equals "x" 0) + (query_nearest_generalized_distance 2 (list "y" ) (list 10) (null) (null) (null) (null) 0.5) + )) + ) + + (print "cascading query_not_in_entity_list: " + (contained_entities "TestContainerExec" (list + (query_not_in_entity_list (list "Child1")) + (query_nearest_generalized_distance 2 (list "y" ) (list 10) (null) (null) (null) (null) 0.5) + )) + ) + + (print "unweighted query: " + (compute_on_contained_entities "TestContainerExec" (list + (query_nearest_generalized_distance 5 (list "y" ) (list 0) (null) (null) (null) (null) 1 1 (null)) + )) + ) + + (print "weighted query: " + (compute_on_contained_entities "TestContainerExec" (list + (query_nearest_generalized_distance 5 (list "y" ) (list 0) (null) (null) (null) (null) 1 1 "weight") + )) + ) + + (print "weighted query list of lists: " + (compute_on_contained_entities "TestContainerExec" (list + (query_nearest_generalized_distance 5 (list "y" ) (list 0) (null) (null) (null) (null) 1 1 "weight" (null) (null) (null) (true)) + )) + ) + + (print "weighted query list of lists: " + (compute_on_contained_entities "TestContainerExec" (list + (query_nearest_generalized_distance 5 (list "y" ) (list 0) (null) (null) (null) (null) 1 1 (null) (null) (null) (null) "y") + )) + ) + + (create_entities "OverflowQueryContainer" (null) ) + (create_entities (list "OverflowQueryContainer" "sess") (lambda (null ##.steps (list 1 2)))) + (create_entities "OverflowQueryContainer" (lambda (null ##a 2))) + (create_entities "OverflowQueryContainer" (lambda (null ##a 2))) + (create_entities "OverflowQueryContainer" (lambda (null ##a 2))) + (create_entities "OverflowQueryContainer" (lambda (null ##a 1.92))) + (create_entities "OverflowQueryContainer" (lambda (null ##a 1.82))) + (create_entities "OverflowQueryContainer" (lambda (null ##a 1.2))) + (create_entities "OverflowQueryContainer" (lambda (null ##a 2.2))) + (create_entities "OverflowQueryContainer" (lambda (null ##a 2.1))) + (print + (compute_on_contained_entities "OverflowQueryContainer" + (append + (query_nearest_generalized_distance + 10 ;k + (list "a") ;context_features + (list 1.7) ;( context_values + (null) ;context_weights + (null) ;context_nominal_counts + (null) ;cyclic_feature_lengths + (null) ;context_deviations + 2 + -1 ;dwe + (null) ;entity weight + (rand) + (null) + "precise" + (true) + ) + ) + ) + ) + + (print "test code and string distances\n") + + (create_entities "TestContainerSimilarCode" + (null) + ) + + (create_entities (list "TestContainerSimilarCode" "Child1") + (lambda (null + ##x 1 + ##y 1 + ##s "s1" + ##weight 0.45 + )) + ) + (create_entities (list "TestContainerSimilarCode" "Child2") + (lambda (null + ##x 1 + ##y (list 1 2) + ##s "s2" + ##weight 0.45 + )) + ) + (create_entities (list "TestContainerSimilarCode" "Child3") + (lambda (null + ##x 1 + ##y (null 1 2) + ##s "s333" + ##weight 0.02 + )) + ) + (create_entities (list "TestContainerSimilarCode" "Child4") + (lambda (null + ##x 1 + ##y 100 + ##s "4s1" + ##weight 0.02 + )) + ) + (create_entities (list "TestContainerSimilarCode" "Child5") + (lambda (null + ##x 1 + ##y (list (list 1 2) 2) + ##s "5s5" + ##weight 0.02 + )) + ) + (create_entities (list "TestContainerSimilarCode" "Child6") + (lambda (null + ##x 1 + ##y (list "a") + ##s "s6" + ##weight 0.02 + )) + ) + + (print "1: " + (contained_entities "TestContainerSimilarCode" (list + (query_nearest_generalized_distance 2 (list "y" ) (list 1) (list 1) (list "code") (null) (null) 1 1) + )) + ) + + (print "2: " + (contained_entities "TestContainerSimilarCode" (list + (query_nearest_generalized_distance 4 (list "y" ) (list (list 1 2)) (list 1) (list "code") (null) (null) 1 1) + )) + ) + + (print "3: " + (contained_entities "TestContainerSimilarCode" (list + (query_nearest_generalized_distance 4 (list "y" ) (list (list (list 1 2))) (list 1) (list "code") (null) (null) 1 1) + )) + ) + + (print "4: " + (contained_entities "TestContainerSimilarCode" (list + (query_nearest_generalized_distance 4 (list "x" "y" ) (list 1 (list (list 1 2))) (list 1 1) (list "continuous" "code") (null) (null) 1 1) + )) + ) + + (print "5: " + (contained_entities "TestContainerSimilarCode" (list + (query_nearest_generalized_distance 3 (list "s" ) (list "s0") (list 1) (list "string") (null) (null) 1 1) + )) + ) + + + (print "--query_max--\n") + (print (contained_entities "TestContainerExec" (list + (query_max "x" 3) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_max "x" 3) + ))) + + (print "--query_min--\n") + (print (contained_entities "TestContainerExec" (list + (query_min "x" 2) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_min "x" 2) + ))) + + (print "--query_sum--\n") + (print (compute_on_contained_entities "TestContainerExec" (list + (query_sum "x") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_sum "x" "weight") + )) "\n") + + (print "--query_mode--\n") + (print (compute_on_contained_entities "TestContainerExec" (list + (query_mode "x") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_mode "x" "weight") + )) "\n") + + (print "--query_quantile--\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_quantile "x" 0.5) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_quantile "x" 0.5 "weight_eq") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_quantile "x" 0.5 "weight") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_quantile "x" 0.25) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_quantile "x" 0.25 "weight_eq") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_quantile "x" 0.75) + )) "\n") + + (print "--query_generalized_mean--\n") + (declare (assoc mean + (compute_on_contained_entities "TestContainerExec" (list (query_generalized_mean "x" 1))) + )) + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "x" 1) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "weight" 0) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "weight" -1) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "x" 2) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "x" 1 "weight") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "weight" 0 "weight") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "x" 1 (null) mean (true) (true)) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "x" 2 (null) mean (true)) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "x" 3 (null) mean (false)) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_generalized_mean "x" 4 (null) mean (true)) + )) "\n") + + (print "--query_min_difference--\n") + (print (compute_on_contained_entities "TestContainerExec" (list + (query_min_difference "x") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_min_difference "weight") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_min_difference "weight" (null) (false)) + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_min_difference "weight" .3 (true)) + )) "\n") + + (print "--query_max_difference--\n") + (print (compute_on_contained_entities "TestContainerExec" (list + (query_max_difference "x") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_max_difference "x" 300) + )) "\n") + + (print "--query_value_masses--\n") + (print (compute_on_contained_entities "TestContainerExec" (list + (query_value_masses "x") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_value_masses "x" "weight") + )) "\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_value_masses "bar" (null) (false)) + )) "\n") + + (print "--query_less_or_equal_to--\n") + (print (contained_entities "TestContainerExec" (list + (query_less_or_equal_to "x" 3) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_less_or_equal_to "x" 3) + ))) + + (print "--query_greater_or_equal_to--\n") + (print (contained_entities "TestContainerExec" (list + (query_greater_or_equal_to "x" 4) + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_greater_or_equal_to "x" 4) + ))) + + (print "--query_within_generalized_distance--\n") + (print (contained_entities "TestContainerExec" (list + (query_within_generalized_distance 60 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 0.5 1 (null) "random seed 1234" "radius") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_within_generalized_distance 60 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 0.5 1 (null) "random seed 1234" "radius") + ))) + + (print "--query_nearest_generalized_distance--\n") + (print (contained_entities "TestContainerExec" (list + (query_nearest_generalized_distance 2 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 0.5 1 (null) "random seed 1234" "radius") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_nearest_generalized_distance 2 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 0.5 1 (null) "random seed 1234" "radius") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_within_generalized_distance 60 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 0.5 1 (null) "random seed 1234" "radius") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_within_generalized_distance 60 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 0.5 1 (null) "random seed 1234" "radius") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_nearest_generalized_distance 2 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 0.5 1 (null) "random seed 1234" "radius") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_nearest_generalized_distance 2 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 0.01 1 (null) "random seed 1234" "radius") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_nearest_generalized_distance 2 (list "x" "y") (list 0.0 0.0) (list 2 1) (list "nominal" "cyclic") (list 1 360) (null) 0.01 1 (null) "random seed 1234" "radius") + ))) + + (print "assoc-based: " (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_nearest_generalized_distance 2 (list "x" "y") (list 0.0 0.0) (associate "x" 2 "y" 1) (associate "x" "nominal" "y" "cyclic") (list 1 360) (null) 0.01 1 (null) "random seed 1234" "radius") + ))) + + (print (contained_entities "TestContainerExec" (list + (query_exists "x") + (query_nearest_generalized_distance 2 (list "x" "y") (list 0.0 0.0) (list 2 1) (list "nominal" "continuous") (list 1) (list 0.1 -0.2) 0.01 1 (null) "random seed 1234" "radius") + ))) + + (print "--contained_entities caching and permissions--\n") + + (print (assign_to_entities "TestContainerExec" (assoc !e 19)) "\n") + (print (retrieve_from_entity "TestContainerExec" "!e") "\n") + (print (retrieve_entity_root "TestContainerExec") "\n") + + (create_entities "QueryCacheTest1" (lambda + (parallel ##a 3 ) + ) ) + + (print (size (contained_entities (list + (query_equals "a" 3) + ))) "\n") + + (assign_to_entities "QueryCacheTest1" (assoc a 5)) + + (print (size (contained_entities (list + (query_equals "a" 3) + ))) "\n") + + (assign_to_entities "QueryCacheTest1" (assoc a 3)) + + (destroy_entities "QueryCacheTest1") + + (print (size (contained_entities (list + (query_equals "a" 3) + ))) "\n") + + (create_entities "QueryCacheTest2" (lambda + (parallel ##a 3 ) + ) ) + + (print (size (contained_entities (list + (query_equals "a" 3) + ))) "\n") + + (create_entities "eq_distance_test" (null)) + + (create_entities (list "eq_distance_test") + (lambda (parallel + ##x 0 + ##y 0 + )) + ) + + (create_entities (list "eq_distance_test" "to_delete1") + (lambda (parallel + ##x 1 + ##y 0 + )) + ) + + (create_entities (list "eq_distance_test") + (lambda (parallel + ##x 2 + ##y 0 + )) + ) + + (create_entities (list "eq_distance_test") + (lambda (parallel + ##x 3 + ##y 0 + )) + ) + + (create_entities (list "eq_distance_test") + (lambda (parallel + ##x 0 + ##y 1 + )) + ) + + (create_entities (list "eq_distance_test") + (lambda (parallel + ##x 0 + ##y 2 + )) + ) + + (create_entities (list "eq_distance_test") + (lambda (parallel + ##x 1 + ##y 1 + )) + ) + + (print + (map (lambda (retrieve_entity_root (list "eq_distance_test" (target_value 1)))) + + (contained_entities "eq_distance_test" (list + (query_within_generalized_distance 1 (list "x" "y") (list 0.0 0.0)) + ) ) + ) + + ) + + (create_entities (list "eq_distance_test" "to_delete2") + (lambda (parallel + ##x 0 + ##y 0.5 + )) + ) + + (print + (map (lambda (retrieve_entity_root (list "eq_distance_test" (target_value 1)))) + + (contained_entities "eq_distance_test" (list + (query_within_generalized_distance 1 (list "x" "y") (list 0.0 0.0)) + ) ) + ) + + ) + + (destroy_entities (list "eq_distance_test" "to_delete2")) + (destroy_entities (list "eq_distance_test" "to_delete1")) + + (print + (map (lambda (retrieve_entity_root (list "eq_distance_test" (target_value 1)))) + + (contained_entities "eq_distance_test" (list + (query_within_generalized_distance 1 (list "x" "y") (list 0.0 0.0)) + ) ) + ) + ) + + (print "--compute_on_contained_entities--\n") + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_exists "q") + ))) + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_exists "x") + (query_within_generalized_distance 60 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 1 1 (null) "random seed 1234" "radius") + ))) + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_within_generalized_distance 60 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 1 1 (null) "random seed 1234" "radius") + ))) + + (print (compute_on_contained_entities "TestContainerExec" (list + (query_within_generalized_distance 2 (list "x" "y") (list 0.0 0.0) (null) (null) (null) (null) 1 1 (null) "random seed 1234" "radius") + ))) + +(print "--compute_entity_convictions--\n") + + (create_entities "entity1" (lambda + (parallel ##alpha 3 ##b 0.17 ##c 1) + ) ) + + (create_entities "entity2" (lambda + (parallel ##alpha 4 ##b 0.12 ##c 0) + ) ) + + (create_entities "entity3" (lambda + (parallel ##alpha 5 ##b 0.1 ##c 0 ##x 16) + ) ) + + (create_entities "entity4" (lambda + (parallel ##alpha 1 ##b 0.14 ##c 1 ##x 8) + ) ) + + (create_entities "entity5" (lambda + (parallel ##alpha 9 ##b 0.11 ##c 1 ##x 32) + ) ) + + (create_entities "vert0" (lambda + (parallel ##x 0 ##y 0 ##object 1) + ) ) + + (create_entities "vert1" (lambda + (parallel ##x 1 ##y 0 ##object 1) + ) ) + + (create_entities "vert3" (lambda + (parallel ##x 0 ##y 1 ##object 1) + ) ) + + (create_entities "vert2" (lambda + (parallel ##x 1 ##y 1 ##object 1 ##vert_to_remove 1) + ) ) + + + (create_entities "vert4" (lambda + (parallel ##x 0.5 ##y 0.5 ##object 2) + ) ) + + (create_entities "vert5" (lambda + (parallel ##x 2 ##y 1 ##object 2) + ) ) + + (print "case convictions:\n" (compute_on_contained_entities (list + (query_exists "alpha") + (compute_entity_convictions 2 (list "alpha" "b" "c") (null) (null) (list 0 0 1) (null) (null) 0.5) + ))) + + (print "case convictions:\n" (compute_on_contained_entities (list + (compute_entity_convictions 2 (list "alpha" "b" "c") (null) (null) (list 0 0 1) (null) (null) 0.1) + ))) + + (print "case convictions unweighted:\n" (compute_on_contained_entities (list + (compute_entity_convictions 2 (list "alpha" "b" "c") (null) (null) (list 0 0 1) (null) (null) 2 -1 (null) 1 (null) "fast") + ))) + + (print "case convictions weighted by object (with erroneously long nominal):\n" (compute_on_contained_entities (list + (compute_entity_convictions 2 (list "x" "y") (null) (null) (list 0 0 1) (null) (null) 1 1 "object") + ))) + + (print "case convictions x exists before:\n" (compute_on_contained_entities (list + (query_exists "x") + (compute_entity_convictions 1 (list "alpha" "b" "c") (null) (null) (list 0 0 1) (null) (null) 2.0) + ))) + + (print "case convictions x exists after:\n" (compute_on_contained_entities (list + (compute_entity_convictions 1 (list "alpha" "b" "c") (null) (null) (list 0 0 1) (null) (null) 2.0) + (query_exists "x") + ))) + + (print "case convictions object = 1:\n" (compute_on_contained_entities (list + (query_equals "object" 1) + (compute_entity_convictions 2 (list "x" "y") (null) (null) (null) (null) (null) 2) + ))) + +(assign (assoc obj1_verts + (contained_entities (list + (query_equals "object" 1) + (query_equals "x" 0) + )) +)) + + (assign (assoc obj2_verts + (contained_entities (list + (query_equals "object" 2) + )) + )) + + (print "case convictions on a subset:\n" (compute_on_contained_entities (list + (query_equals "object" 1) + (compute_entity_convictions 2 (list "x" "y") obj1_verts (null) (null) (null) (null) 2 1 (null) "random seed 1234") + ))) + + (print "--compute_entity_group_kl_divergence--\n") + + (print (compute_on_contained_entities (list + (query_exists "object") + (compute_entity_group_kl_divergence 2 (list "x" "y") obj2_verts (null) (null) (null) (null) 2 -1 (null) "random seed 1234") + )) "\n") + + (print "--compute_entity_distance_contributions--\n") + + (print (compute_on_contained_entities (list + (compute_entity_distance_contributions 2 (list "x" "y") (null) (null) (null) (null) (null) 2 -1 (null) "random seed 1234") + ))) + + (print "(these values should match the values of the subset of these keys in the previous assoc):\n" (compute_on_contained_entities (list + (compute_entity_distance_contributions 2 (list "x" "y") (list "vert0" "vert1" "vert2") (null) (null) (null) (null) 2 -1) + ))) + + (print (compute_on_contained_entities (list + (query_exists "object") + (compute_entity_distance_contributions 2 (list "x" "y") (null) (null) (null) (null) (null) 2 -1 (null) "random seed 1234") + ))) + + (print "(these values should match the values of the subset of these keys in the previous assoc):\n" (compute_on_contained_entities (list + (query_exists "object") + (compute_entity_distance_contributions 2 (list "x" "y") obj2_verts (null) (null) (null) (null) 2 -1 (null) "random seed 1234") + ))) + + (print "--compute_entity_kl_divergences--\n") + + (print (compute_on_contained_entities (list + (query_exists "object") + (compute_entity_kl_divergences 2 (list "x" "y") (null) (null) (null) (null) (null) 2 -1 (null) "random seed 1234") + ))) + + (print "(these values should match the values of the subset of these keys in the previous assoc):" (compute_on_contained_entities (list + (query_exists "object") + (compute_entity_kl_divergences 2 (list "x" "y") obj1_verts (null) (null) (null) (null) 2 -1 (null) "random seed 1234") + ))) + + (print "additional conviction calculations:\n" ) + + (print "case conviction:" (compute_on_contained_entities (list + (query_exists "object") + (query_not_equals + "x" 0.5 + ) + (query_not_exists "vert_to_remove") + (compute_entity_convictions 1 (list "x" "y") (null) (null) (null) (null) (null) 2 -1 (null) "random seed 1234") + ))) + + (print "case conviction:" (compute_on_contained_entities (list + (query_exists "object") + (query_not_equals + "x" 0.5 + ) + (query_not_exists "vert_to_remove") + (compute_entity_convictions 2 (list "x" "y") (null) (null) (null) (null) (null) 2 -1 (null) "random seed 1234") + ))) + + (print "cyclic feature nearest neighbors: " ) + + (create_entities "cyclic1" (lambda + (parallel ##is_cyclic 1 ##rank_c 5 ##degree_c 0) + ) ) + + (create_entities "cyclic2" (lambda + (parallel ##is_cyclic 1 ##rank_c 4 ##degree_c 90) + ) ) + + (create_entities "cyclic3" (lambda + (parallel ##is_cyclic 1 ##rank_c 3 ##degree_c 180) + ) ) + + (create_entities "cyclic4" (lambda + (parallel ##is_cyclic 1 ##rank_c 6 ##degree_c 270) + ) ) + + (create_entities "cyclic5" (lambda + (parallel ##is_cyclic 1 ##rank_c 4.5 ##degree_c 360) + ) ) + + (print (compute_on_contained_entities (list + (query_exists "is_cyclic") + (query_nearest_generalized_distance 2 (list "rank_c" "degree_c") (list 4 0) (null) (list "continuous" "cyclic") (list (null) 360) (null) 1 1 (null) "random seed 1234" (null)) + ))) + + (create_entities "CyclicTestEntity" (null)) + (create_entities (list "CyclicTestEntity" "10" ) (lambda (null ##deg 177))) + (create_entities (list "CyclicTestEntity" "20" ) (lambda (null ##deg 103))) + (create_entities (list "CyclicTestEntity" "30" ) (lambda (null ##deg 83))) + (create_entities (list "CyclicTestEntity" "40" ) (lambda (null ##deg 294))) + (create_entities (list "CyclicTestEntity" "50" ) (lambda (null ##deg 80))) + (create_entities (list "CyclicTestEntity" "60" ) (lambda (null ##deg 320))) + (create_entities (list "CyclicTestEntity" "70" ) (lambda (null ##deg 90))) + (create_entities (list "CyclicTestEntity" "80" ) (lambda (null ##deg 300))) + (create_entities (list "CyclicTestEntity" "90" ) (lambda (null ##deg 40))) + (create_entities (list "CyclicTestEntity" "100") (lambda (null ##deg 15))) + (create_entities (list "CyclicTestEntity" "110") (lambda (null ##deg 50))) + (create_entities (list "CyclicTestEntity" "120") (lambda (null ##deg 170))) + (create_entities (list "CyclicTestEntity" "130") (lambda (null ##deg 175))) + (create_entities (list "CyclicTestEntity" "140") (lambda (null ##deg 165))) + (create_entities (list "CyclicTestEntity" "150") (lambda (null ##deg 270))) + (create_entities (list "CyclicTestEntity" "155") (lambda (null ##deg 0))) + (create_entities (list "CyclicTestEntity" "160") (lambda (null ##deg 313))) + (create_entities (list "CyclicTestEntity" "170") (lambda (null ##deg 120))) + (create_entities (list "CyclicTestEntity" "180") (lambda (null ##deg 213))) + (create_entities (list "CyclicTestEntity" "190") (lambda (null ##deg 12))) + (create_entities (list "CyclicTestEntity" "200") (lambda (null ##deg 8))) + + (declare (assoc + buds + (compute_on_contained_entities "CyclicTestEntity" (list + (query_nearest_generalized_distance + 3 ; K + (list "deg") + (list 350) + (null) ; weights + (list "cyclic") ; types + (list 360) ; attributes + (null); deviations + 1 ; p + -1 ; dwe + (null) ; weight + (rand) + ) + )) + )) + + (print "cyclic test expected: 155, 200, 190 ... deg values of 0 8 and 12:\n") + (map (lambda (print (target_index) ": " (target_value) " " (retrieve_entity_root (list "CyclicTestEntity" (target_index 1))))) buds) + + (print "--contains_label--\n") + (print (contains_label "label3") "\n") + (print (contains_label "hhccc") "\n") + (print (contains_label (list "TestContainerExec" "Child1") "y") "\n") + (print (contains_label (list "TestContainerExec" "Child1") "qq") "\n") + + (print "--assign_to_entities--\n") + + #a2e .nan + (assign_to_entities (assoc a2e (null))) + (print a2e) + + (assign_to_entities (assoc label3 " or ")) + (print (unparse (retrieve_from_entity "label3") (true) (true))) + (print (unparse (retrieve_from_entity "MyNewLibrary" "three") (true) (true))) + (print (unparse (assign_to_entities "MyNewLibrary" (associate "three" 5)) (true) (true))) + (print (unparse (retrieve_from_entity "MyNewLibrary" "three") (true) (true))) + + (print (unparse (lambda (parallel #asgn_test 12) ) ) "\n") + (print (unparse (assign_to_entities (assoc "asgn_test" 4)) (true) (true))) + (print (unparse (retrieve_from_entity "asgn_test") (true) (true))) + + (create_entities "MultipleSetTest" (lambda (parallel ##a 1 ##b 2 ##c 3))) + (assign_to_entities "MultipleSetTest" (assoc a 2 b 3 c 4) "MyNewLibrary" (assoc "three" 12)) + (print (retrieve_entity_root "MultipleSetTest")) + (print (retrieve_entity_root "MyNewLibrary")) + + (print "--direct_assign_to_entities--\n") + (create_entities "DRFE" (lambda (parallel ##a 12)) ) + (print (unparse (direct_retrieve_from_entity "DRFE" "a") (true) (true))) + (print (unparse (direct_assign_to_entities "DRFE" (assoc a 7)) (true) (true))) + (print (unparse (direct_retrieve_from_entity "DRFE" "a") (true) (true))) + + (print "--accum_to_entities--\n") + + #i2e 4 + (accum_to_entities (assoc i2e 3)) + (print i2e "\n") + + #i2e2 (list 1 2 3) + (accum_to_entities (assoc i2e2 4)) + (print i2e2 "\n") + + (print "--retrieve_from_entity--\n") + (assign_to_entities (assoc label3 " and ")) + (print (unparse (retrieve_from_entity "label3") (true) (true))) + (print (unparse (retrieve_from_entity "MyNewLibrary" "three") (true) (true))) + + (create_entities "RCT" (lambda (parallel ##a 12 ##b 13)) ) + (print (unparse (retrieve_from_entity "RCT" "a") (true) (true))) + (print (unparse (retrieve_from_entity "RCT" (list "a" "b") ) (true) (true))) + (print (unparse (retrieve_from_entity "RCT" (zip (list "a" "b") null) ) (true) (true))) + + (print "--direct_retrieve_from_entity--\n") + (print (unparse (retrieve_from_entity "DRFE" "a") (true) (true))) + (print (unparse (direct_retrieve_from_entity "DRFE" "a") (true) (true))) + + (print "--call_entity--\n") + (print (contained_entities "TestContainerExec")) + + (print (unparse (retrieve_entity_root "TestContainerExec") (true) (true))) + (print (unparse (call_entity "TestContainerExec" "^a") (true) (true))) + (print (unparse (call_entity "TestContainerExec" "b") (true) (true))) + (print (unparse (call_entity "TestContainerExec" "c" (assoc x 5)) (true) (true))) + + (print "--call_entity_get_changes--\n") + (create_entities "CEGCTest" (lambda + (parallel ##a_assign + (seq + (create_entities "Contained" (lambda + (null ##a 4 ##b 6 ) + ) ) + (print (unparse (retrieve_from_entity "Contained" "a") (true) (true))) + (assign_to_entities "Contained" (assoc a 6 b 10) ) + (print (unparse (retrieve_from_entity "Contained" "a") (true) (true))) + (set_entity_rand_seed "Contained" "bbbb") + (destroy_entities "Contained") + ) + ) + ) ) + + (print (call_entity_get_changes "CEGCTest" "a_assign")) + + (print "--call_container--\n") + (print (call_entity "TestContainerExec" "d" (assoc x 5)) "\n") + (print (call_entity "TestContainerExec" "d" (assoc x 5) 30 30) "\n") + (print (call_entity "TestContainerExec" "d" (assoc x 5) 1 1) "\n") + + (print "--circular, repeated, and preevaluated references--\n") + + (print (associate "a" 1 "b" 2)) + (print (lambda (associate "a" 1 "b" 2))) + + (print (get (lambda (assoc "a" 1 "b" 2)) 0) "\n") + (print (get (lambda (assoc "a" 1 "b" 2)) "a") "\n") + (print (get (lambda (list "a" 1 "b" 2)) "a") "\n") + + (print (replace + (list (associate "a" 1)) + (list 2) 1 + (list 1) (lambda (get (target) 0)))) + + (print + (list (associate "a" 1) + @(get (target 0) 0) 1 + ) + ) + + (print "--nan equality tests--\n") + + (print "(= (null) .nan): " (= (null) .nan) "\n") + (print "(= (+ (null)) .nan): " (= (+ (null)) .nan) "\n") + + (create_entities (list "NaNTest") (null)) + + (create_entities (list "NaNTest" "Entity3") + (lambda + (null + ##label1 3 + ##label2 1 + ) + ) + ) + + (create_entities (list "NaNTest" "EntityNull") + (lambda + (null + ##label1 (null) + ##label2 1 + ) + ) + ) + + (create_entities (list "NaNTest" "EntityNaN") + (lambda + (null + ##label1 .nan + ##label2 1 + ) + ) + ) + + (print (contained_entities "NaNTest" (list + (query_equals "label1" 3) + (query_exists "label2") + )) + ) + + (print (contained_entities "NaNTest" (list + (query_equals "label1" .nan) + (query_exists "label2") + )) + ) + + (print (contained_entities "NaNTest" (list + (query_equals "label1" (null)) + (query_exists "label2") + )) + ) + + (print (contained_entities "NaNTest" (list + (query_nearest_generalized_distance 3 (list "label1") (list 0.0) (null) (null) (null) (null) 1) + )) + ) + + (print "--combo tests--\n") + + (print (call (set_type + (list 1 0.5 "3") + (get_comments + (lambda + ;+ + (null) + ) + ) + )) "\n") + + ;compute distance between two vectors + (print + (pow + (reduce + (lambda + (+ (target_value 1) (target_value)) + ) + (map + (lambda + (pow + (- (get (target_value) 0) (get (target_value) 1)) + 2 + ) + ) + (list 3 4) + (list 0 0) + ) + ) + 0.5 + ) + "\n" + ) + + ;label combining + (create_entities "dummy_container2" (lambda (null ##A 10 ##duped (list "b")))) + + (assign_entity_roots "dummy_container2" + (set_type + (append + (values (retrieve_entity_root "dummy_container2" 1)) + (list (set_labels (list "a") (list "duped"))) + ) + (null) + ) + ) + + (print (retrieve_entity_root "dummy_container2" 1) "\n") + + ;combo query + + (create_entities "combo_query" (null)) + (create_entities (list "combo_query" "world") (lambda (null ##A 6 ##B 7 ##C 9 ##D 1))) + (create_entities (list "combo_query" "hello") (lambda (null ##A 6 ##B 7 ##C 19 ##D 1 ##E 2 ##F 3))) + (create_entities (list "combo_query" "!") (lambda (null ##A 6 ##B 7 ##C 19 ##D 1 ##E 2 ##F 3))) + + (print + (contained_entities "combo_query" + (list + (query_exists "A") (query_greater_or_equal_to "B" 5) (query_exists "B") (query_greater_or_equal_to "C" 18) + ) + ) + ) + + (create_entities "nan_queries" (null)) + (create_entities (list "nan_queries" "a1") (lambda (null ##A 10 ##B (null)))) + (create_entities (list "nan_queries" "a2") (lambda (null ##A 11 ##B 2))) + (create_entities (list "nan_queries" "a3") (lambda (null ##A (null) ##B 1))) + + + ;expected output is 3 neighbors in order: a1, a2, a3 + (print + (compute_on_contained_entities "nan_queries" (list + (query_nearest_generalized_distance + 3 ;k-value + (list "A" "B") + (list 9 2) + (null) + (null) + (null) + (null) + 2 ;p-value + ) + )) + ) + + ;expected output is 2 neighbors in order: a1, a2 + (print + (compute_on_contained_entities "nan_queries" (list + (query_nearest_generalized_distance + 2 ;k-value + (list "A" "B") + (list 9 2) + (null) + (null) + (null) + (null) + 2 ;p-value + ) + )) + ) + + ;expected output is only 1 neighbor + (print + (compute_on_contained_entities "nan_queries" (list + (query_nearest_generalized_distance + 1 ;k-value + (list "A" "B") + (list 9 2) + (null) + (null) + (null) + (null) + 2 ;p-value + ) + )) + ) + + ;expected output is 3 neighbors in order: a2, a1/a3 + (print + (compute_on_contained_entities "nan_queries" (list + (query_nearest_generalized_distance + 3 ;k-value + (list "A" "B") ;labels + (list 9 2) ;values + (null) ;weights + (list "continuous" "continuous") ;distance types + (list (list 5 6) (list 5 5)) ;attributes + (null) + 2 ;p-value + ) + )) + ) + + (create_entities (list "nan_queries" "a4") (lambda (null ##A (null) ##B (null)))) + + ;expected output is 3 neighbors in order: a4, a1/a3 + (print + (compute_on_contained_entities "nan_queries" (list + (query_nearest_generalized_distance + 3 ;k-value + (list "A" "B") ;labels + (list (null) (null)) ;values + (null) ;weights + (list "continuous" "continuous") ;distance types + (list (list 1 0) (list 1 0)) ;attributes + (null) + 2 ;p-value + ) + )) + ) + + (print "--accuracy tests--\n") + (print (generalized_distance + ;weights + (list 2.5 10 5 0.033333333 3.333333333 20) + ;types + (list "nominal" "nominal" "nominal" "continuous" "nominal" "continuous") + ;attributes + (list 3 100 7 0 10 0) + ;deviations + (list 0.4 0.1 0.2 30 0.3 0.05) + ;p + 0.5 + (list 1 2 1 100 1 50) + (list 1 1 1 120 1 50.1) + ) "\n") + + (print "expected: 256.5114466\n") + + (print (generalized_distance + ;weights + (list 2.5 10 5 0.033333333 3.333333333 20) + ;types + (list "nominal" "nominal" "nominal" "continuous" "nominal" "continuous") + ;attributes + (list 3 100 7 0 10 0) + ;deviations + (list 0.4 0.1 0.2 30 0.3 0.05) + ;p + 1 + ;point 1 + (list 1 2 1 100 1 50) + ;point 2 + (list 1 1 1 120 1 50.1) + ) "\n") + + (print "expected: 8.037178684\n") + + (print (generalized_distance + ;weights + (list 2.5 10 5 0.033333333 3.333333333 20) + ;types + (list "nominal" "nominal" "nominal" "continuous" "nominal" "continuous") + ;attributes + (list 3 100 7 0 10 0) + ;deviations + (list 0.4 0.1 0.2 30 0.3 0.05) + ;p + 0 + ;point 1 + (list 1 2 1 100 1 50) + ;point 2 + (list 1 1 1 120 1 50.1) + ) "\n") + + (print "expected: 0.14362593\n") + + (create_entities "DistanceTestContainer" + (lambda (null)) + ) + + ;distance of 256.51144654 + (create_entities (list "DistanceTestContainer" "point1") + (lambda (null + ##a 1 + ##b 1 + ##c 1 + ##d 120 + ##e 1 + ##f 50.1 + )) + ) + + ;distance of 273.1247383 + (create_entities (list "DistanceTestContainer" "point2") + (lambda (null + ##a 2 + ##b 1 + ##c 1 + ##d 120 + ##e 1 + ##f 50.1 + )) + ) + + ;distance of 256.51144654 + (create_entities (list "DistanceTestContainer" "point3") + (lambda (null + ##a 1 + ##b 1 + ##c 1 + ##d 120 + ##e 1 + ##f 50.1 + )) + ) + + ;distance of 936.92716429 + (create_entities (list "DistanceTestContainer" "point4") + (lambda (null + ##a 2 + ##b 1 + ##c 2 + ##d 160 + ##e 2 + ##f 51 + )) + ) + + ;distance of 330.9586550 + (create_entities (list "DistanceTestContainer" "point5") + (lambda (null + ##a 1 + ##b 1 + ##c 1 + ##d 119 + ##e 1 + ##f 49.8 + )) + ) + + (map (lambda + ;distance of 330.9586550 + (create_entities (list "DistanceTestContainer") + (lambda (null + ##a 1 + ##b 1 + ##c 1 + ##d 119 + ##e 1 + ##f 49.8 + )) + ) + ) + (range 0 1000) + ) + + (print (compute_on_contained_entities "DistanceTestContainer" (list + (query_exists "a") + (query_nearest_generalized_distance + ;k + 3 + ;features + (list "a" "b" "c" "d" "e" "f") + ;center values + (list 1 2 1 100 1 50) + ;weights + (list 2.5 10 5 0.033333333 3.333333333 20) + ;types + (list "nominal" "nominal" "nominal" "continuous" "nominal" "continuous") + ;attributes + (list 3 100 7 0 10 0) + ;deviations + (list 0.4 0.1 0.2 30 0.3 0.05) + ;p + 0.5 + ;dwe + 1 + (null) ; weight feature + "random seed 1234" + ) + ))) + + (print "distance symmetry tests\n") + (create_entities "DistanceSymmetryContainer" (null) ) + + (create_entities (list "DistanceSymmetryContainer" "A") (lambda (null + ##A 4 + ##B 8 + ) )) + (create_entities (list "DistanceSymmetryContainer" "B") (lambda (null + ##A 4 + ##B 9 + ) )) + (create_entities (list "DistanceSymmetryContainer" "C") (lambda (null + ##A 4 + ##B 7 + ) )) + (create_entities (list "DistanceSymmetryContainer" "D") (lambda (null + ##A 4 + ##B 10 + ) )) + (create_entities (list "DistanceSymmetryContainer" "E") (lambda (null + ##A 7 + ##B 8 + ) )) + (create_entities (list "DistanceSymmetryContainer" "F") (lambda (null + ##A 7 + ##B 9 + ) )) + (create_entities (list "DistanceSymmetryContainer" "G") (lambda (null + ##A 7 + ##B 7 + ) )) + (create_entities (list "DistanceSymmetryContainer" "H") (lambda (null + ##A 10 + ##B 8 + ) )) + (create_entities (list "DistanceSymmetryContainer" "I") (lambda (null + ##A 10 + ##B 9 + ) )) + (create_entities (list "DistanceSymmetryContainer" "J") (lambda (null + ##A 10 + ##B 10 + ) )) + (print + + (compute_on_contained_entities "DistanceSymmetryContainer" (list + (query_nearest_generalized_distance + 8 ; k + (list "A" "B") + (list 4 9) + (null) ; context_weights + (list "nominal" "nominal") ; types + (list 1 1) ; attributes + (null) ; context_deviations + 0.1 ; p_parameter + 1 ; dwe = 1 means return computed distance to each case + (null) ; weight + (rand) + (null) + "precise" + (true) + ) + )) + (compute_on_contained_entities "DistanceSymmetryContainer" (list + (query_nearest_generalized_distance + 8 ; k + (list "B" "A") + (list 9 4) + (null) ; context_weights + (list "nominal" "nominal") ; types + (list 1 1) ; attributes + (null) ; context_deviations + 0.1 ; p_parameter + 1 ; dwe = 1 means return computed distance to each case + (null) ; weight + (rand) + (null) + "precise" + (true) + ) + )) + ) + + (create_entities "BoxConvictionTestContainer" (null) ) + + (create_entities (list "BoxConvictionTestContainer" "vert0") (lambda + (null ##x 0 ##y 0 ##weight 2) + ) ) + + (create_entities (list "BoxConvictionTestContainer" "vert1") (lambda + (null ##x 0 ##y 1 ##weight 1) + ) ) + + (create_entities (list "BoxConvictionTestContainer" "vert2") (lambda + (null ##x 1 ##y 0 ##weight 1) + ) ) + + (create_entities (list "BoxConvictionTestContainer" "vert3") (lambda + (null ##x 2 ##y 1 ##weight 1) + ) ) + + (print "distance contributions\n") + (print "dc: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_distance_contributions 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise" (true)) + ))) + + (print "weighted dc: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_distance_contributions 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 "weight" "fixed_seed" (null) "recompute_precise") + ))) + + (print "removal conviction\n") + + (print "kl: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_kl_divergences 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise" (true)) + ))) + + (print "weighted kl: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_kl_divergences 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 "weight" "fixed_seed" (null) "recompute_precise" (true)) + ))) + + (print "convictions: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_convictions 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise" (true)) + ))) + + (print "further parameterized convictions: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_convictions 1 (list "x" "y") (list "vert0" "vert1" "vert2" "vert3") (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise" (true)) + ))) + + (print "weighted convictions: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_convictions 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 "weight" "fixed_seed" (null) "recompute_precise" (true)) + ))) + + (print "group kl divergence: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_group_kl_divergence 1 (list "x" "y") (list "vert1") (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise" (true)) + )) "\n") + + (print "weighted group kl divergence: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_group_kl_divergence 1 (list "x" "y") (list "vert1") (null) (null) (null) (null) 2.0 -1 "weight" "fixed_seed" (null) "recompute_precise" (true)) + )) "\n") + + (print "addition conviction\n") + + (print "kl: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_kl_divergences 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise" (false)) + ))) + + (print "weighted kl: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_kl_divergences 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 "weight" "fixed_seed" (null) "recompute_precise" (false)) + ))) + + (print "convictions: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_convictions 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise" (false)) + ))) + + (print "further parameterized convictions: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_convictions 1 (list "x" "y") (list "vert0" "vert1" "vert2" "vert3") (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise" (false)) + ))) + + (print "weighted convictions: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_convictions 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 "weight" "fixed_seed" (null) "recompute_precise" (false)) + ))) + + (print "group kl divergence: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_group_kl_divergence 1 (list "x" "y") (list "vert1") (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise" (false)) + )) "\n") + + (print "weighted group kl divergence: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_group_kl_divergence 1 (list "x" "y") (list "vert1") (null) (null) (null) (null) 2.0 -1 "weight" "fixed_seed" (null) "recompute_precise" (false)) + )) "\n") + + (print "adding a case\n") + + (create_entities (list "BoxConvictionTestContainer" "vert4") (lambda + (null ##x 3 ##y 0) + ) ) + + (print "noncyclic KL: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_kl_divergences 1 (list "x" "y") (null) (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise") + ))) + + (print "noncyclic group kl divergence: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_group_kl_divergence 1 (list "x" "y") (list "vert4") (null) (null) (null) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise") + )) "\n") + + (print "cyclic KL: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_kl_divergences 1 (list "x" "y") (null) (null) (list "cyclic" "continuous") (list 3.5 (null)) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise") + ))) + + (print "cyclic conviction: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_convictions 1 (list "x" "y") (null) (null) (list "cyclic" "continuous") (list 3.5 (null)) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise") + ))) + + (print "cyclic group kl divergence: " (compute_on_contained_entities "BoxConvictionTestContainer" (list + (compute_entity_group_kl_divergence 1 (list "x" "y") (list "vert4") (null) (list "cyclic" "continuous") (list 3.5 (null)) (null) 2.0 -1 (null) "fixed_seed" (null) "recompute_precise") + )) "\n") + + (print "surprisal transforms\n") + + (create_entities "SurprisalTransformContainer" (null) ) + + (create_entities (list "SurprisalTransformContainer" "vert0") (lambda + (null ##x 3 ##weight 2) + ) ) + + (create_entities (list "SurprisalTransformContainer" "vert1") (lambda + (null ##x 3 ##weight 0) + ) ) + + (create_entities (list "SurprisalTransformContainer" "vert2") (lambda + (null ##x 4 ##weight 1) + ) ) + + (create_entities (list "SurprisalTransformContainer" "vert3") (lambda + (null ##x 5 ##weight 1) + ) ) + + ;should be: + ;(list "vert0" "vert1" "vert2" "vert3") + ;(list 0.049787068367863944 0.049787068367863944 0.01831563888873418 0.006737946999085467) + (print "probabilities: " + (compute_on_contained_entities "SurprisalTransformContainer" (list + (query_nearest_generalized_distance + 4 ; k + (list "x") + (list 0) + (null) ; context_weights + (list "continuous") ; types + (null) ; attributes + (null) ; context_deviations + 1 ; p_parameter + "surprisal_to_prob" ; dwe = 1 means return computed distance to each case + (null) ; weight + (rand) + (null) + "precise" + (true) + ) + )) + "\n" ) + + ;should be + ;(list "vert0" "vert2" "vert3" "vert1") + ;(list 0.09709538455906153 0.01831563888873418 0.006737946999085467 0) + (print "weighted probabilities: " + (compute_on_contained_entities "SurprisalTransformContainer" (list + (query_nearest_generalized_distance + 4 ; k + (list "x") + (list 0) + (null) ; context_weights + (list "continuous") ; types + (null) ; attributes + (null) ; context_deviations + 1 ; p_parameter + "surprisal_to_prob" ; dwe = 1 means return computed distance to each case + "weight" ; weight + (rand) + (null) + "precise" + (true) + ) + )) + "\n" ) + + (create_entities (list "SurprisalTransformContainer" "testvert") (lambda + (null ##x 0 ##weight 1) + ) ) + + ;should be approx 2.123 + (print "surprisal contribution: " (compute_on_contained_entities "SurprisalTransformContainer" (list + (compute_entity_distance_contributions 4 (list "x") (list "testvert") (null) (null) (null) (null) 1 "surprisal_to_prob" (null) "fixed_seed" (null) "precise") + ))) + + ;should be approx 2.123 + (print "weighted surprisal contribution: " (compute_on_contained_entities "SurprisalTransformContainer" (list + (compute_entity_distance_contributions 4 (list "x") (list "testvert") (null) (null) (null) (null) 1 "surprisal_to_prob" "weight" "fixed_seed" (null) "precise") + ))) + + (print "--concurrency tests--\n") + + ||(parallel (print "hello\n") (print "hello\n")) + (print "+ : " ||(+ 1 2 3 4 5 6 7 8 9) "\n") + (print "- : " ||(- 45 1 2 3 4 5 6 7 8 9) "\n") + (print "* : " ||(* 1 2 3 4) "\n") + (print "/ : " ||(/ 24 1 2 3 4) "\n") + (print "mod : " ||(mod 7 3) "\n") + (print "max : " ||(max 1 2 3 4 5 6 7 8 9 10) "\n") + (print "min : " ||(min 1 2 3 4 5 6 7 8 9 10) "\n") + (print "and : " ||(and (true) (true) (true) (true)) "\n") + (print "or : " ||(or (false) (false) (true) (false)) "\n") + (print "xor : " ||(xor (true) (false)) "\n") + (print "= : " ||(= 1 1 1 2) "\n") + (print "!= : " ||(!= 1 1 1 2) "\n") + (print "< : " ||(< 1 1 1 2) "\n") + (print "<= : " ||(<= 1 1 1 2) "\n") + (print "> : " ||(> 1 1 1 2) "\n") + (print ">= : " ||(>= 1 1 1 2) "\n") + (print "~ : " ||(~ 1 1 1 2) "\n") + (print "list : " ||(list (+ 1 0) 1 1 2) "\n") + (print "associate : " ||(associate "a" 1 "b" 1 "c" 1 "d" 2) "\n") + (print "assoc : " ||(assoc a (+ 1 0) b 1 c 1 d 2) "\n") + (print "map list : " ||(map (lambda (* (target_value) 2)) (list 1 2 3 4)) "\n") + (print "map assoc : " ||(map (lambda (* (target_value) 2)) (associate "a" 1 "b" 2 "c" 3 "d" 4)) "\n") + (print "filter list : " ||(filter (lambda (> (target_value) 2)) (list 1 2 3 4)) "\n") + (print "filter assoc : " ||(filter (lambda (< (target_index) 20)) (associate 10 1 20 2 30 3 40 4)) "\n") + (print "filter assoc 2 : " ||(filter (lambda (<= (target_value) 2)) (associate 10 1 20 2 30 3 40 4)) "\n") + + ;nested concurrency + ||(map + (lambda + (let (assoc index (target_value 1)) + ||(map + (lambda (+ (target_index))) + (range 1 100) + ) + ) + ) + (range 1 100) + ) + + ;writing outside of concurrency + (let (assoc x (list)) + ||(map (lambda + (let (assoc y (target_value 1) ) + (accum "x" (null) y) + ) + ) + (range 1 1000) + ) + + (print "Expecting 1000: " (size x) "\n") + ) + + (print "--concurrent entity writes--\n") + #concurrent_ent_writes (list) + ||(map (lambda + (accum_to_entities (assoc concurrent_ent_writes (list (target_value 2)))) + ) + (range 1 1000) + ) + ;make sure the lists match up and none were lost + (print "concurrent entity writes successful: " (= (range 1 1000) (sort concurrent_ent_writes)) "\n") + + (print "--total execution time--\n") + (print (- (system_time) start_time) "\n") +) diff --git a/src/Amalgam/amlg_code/module_test.amlg b/src/Amalgam/amlg_code/module_test.amlg new file mode 100644 index 00000000..5ec312b6 --- /dev/null +++ b/src/Amalgam/amlg_code/module_test.amlg @@ -0,0 +1,8 @@ +(assoc + a #a 1 + hello #hello (print "hello\n") + b #b (true) + c #c 0.1 + d #d 100000000 +) + diff --git a/src/Amalgam/amlg_code/module_test.json b/src/Amalgam/amlg_code/module_test.json new file mode 100644 index 00000000..585086bb --- /dev/null +++ b/src/Amalgam/amlg_code/module_test.json @@ -0,0 +1 @@ +[{"b":4,"a":3},{"c":"c","d":null}] \ No newline at end of file diff --git a/src/Amalgam/amlg_code/module_test.yaml b/src/Amalgam/amlg_code/module_test.yaml new file mode 100644 index 00000000..66dfd2e9 --- /dev/null +++ b/src/Amalgam/amlg_code/module_test.yaml @@ -0,0 +1,5 @@ +--- +a: 123 +b: "ABC" +c: 123.45 +d: true \ No newline at end of file diff --git a/src/Amalgam/amlg_code/module_test/submoduletest.amlg b/src/Amalgam/amlg_code/module_test/submoduletest.amlg new file mode 100644 index 00000000..6e363397 --- /dev/null +++ b/src/Amalgam/amlg_code/module_test/submoduletest.amlg @@ -0,0 +1,6 @@ +(parallel + #a 7 + (#hello print + "sub hello\n" + ) +) diff --git a/src/Amalgam/amlg_code/module_test/submoduletest2.amlg b/src/Amalgam/amlg_code/module_test/submoduletest2.amlg new file mode 100644 index 00000000..b5260ed7 --- /dev/null +++ b/src/Amalgam/amlg_code/module_test/submoduletest2.amlg @@ -0,0 +1,6 @@ +(parallel + #a 8 + (#hello print + "sub hello 2\n" + ) +) diff --git a/src/Amalgam/amlg_code/module_test2.amlg b/src/Amalgam/amlg_code/module_test2.amlg new file mode 100644 index 00000000..00a38d62 --- /dev/null +++ b/src/Amalgam/amlg_code/module_test2.amlg @@ -0,0 +1,8 @@ +(assoc + hello #hello + (print "hello\n") + b #b (true) + a #a 1 + c #c 0.1 + d #d 100000000 +) diff --git a/src/Amalgam/amlg_code/module_test2/submoduletest.amlg b/src/Amalgam/amlg_code/module_test2/submoduletest.amlg new file mode 100644 index 00000000..025683ac --- /dev/null +++ b/src/Amalgam/amlg_code/module_test2/submoduletest.amlg @@ -0,0 +1,5 @@ +(parallel + #a 7 + #hello + (print "sub hello\n") +) diff --git a/src/Amalgam/amlg_code/module_test2/submoduletest2.amlg b/src/Amalgam/amlg_code/module_test2/submoduletest2.amlg new file mode 100644 index 00000000..d8efc524 --- /dev/null +++ b/src/Amalgam/amlg_code/module_test2/submoduletest2.amlg @@ -0,0 +1,5 @@ +(parallel + #a 8 + #hello + (print "sub hello 2\n") +) diff --git a/src/Amalgam/amlg_code/persist_module_test.amlg b/src/Amalgam/amlg_code/persist_module_test.amlg new file mode 100644 index 00000000..b4333df6 --- /dev/null +++ b/src/Amalgam/amlg_code/persist_module_test.amlg @@ -0,0 +1,5 @@ +(parallel + #a 1 + #hello + (print "hello\n") +) diff --git a/src/Amalgam/amlg_code/persist_module_test/psm.amlg b/src/Amalgam/amlg_code/persist_module_test/psm.amlg new file mode 100644 index 00000000..485d2c81 --- /dev/null +++ b/src/Amalgam/amlg_code/persist_module_test/psm.amlg @@ -0,0 +1,5 @@ +(parallel + #a 8 + #hello + (print "hello from psm\n") +) diff --git a/src/Amalgam/amlg_code/persistent_tree_test_inter.amlg b/src/Amalgam/amlg_code/persistent_tree_test_inter.amlg new file mode 100644 index 00000000..50e60977 --- /dev/null +++ b/src/Amalgam/amlg_code/persistent_tree_test_inter.amlg @@ -0,0 +1,6 @@ +(parallel + #c 3 + (#chello print + "chello\n" + ) +) \ No newline at end of file diff --git a/src/Amalgam/amlg_code/persistent_tree_test_leaf.amlg b/src/Amalgam/amlg_code/persistent_tree_test_leaf.amlg new file mode 100644 index 00000000..a846389c --- /dev/null +++ b/src/Amalgam/amlg_code/persistent_tree_test_leaf.amlg @@ -0,0 +1,5 @@ +(parallel + #f 6 + #jello + (print "jello\n") +) diff --git a/src/Amalgam/amlg_code/persistent_tree_test_root.amlg b/src/Amalgam/amlg_code/persistent_tree_test_root.amlg new file mode 100644 index 00000000..bfc1142f --- /dev/null +++ b/src/Amalgam/amlg_code/persistent_tree_test_root.amlg @@ -0,0 +1,12 @@ +(parallel + #b 2 + (#yello print + "yello\n" + ) + + #kill_inter + (destroy_entities "PersistTreeInter") + + #clean_backup + (destroy_entities "leaf_backup") +) \ No newline at end of file diff --git a/src/Amalgam/amlg_code/repl.amlg b/src/Amalgam/amlg_code/repl.amlg new file mode 100644 index 00000000..4401ed7f --- /dev/null +++ b/src/Amalgam/amlg_code/repl.amlg @@ -0,0 +1,13 @@ +(while 1 + (system "printline" "> ") + ;(print (call (parse (system "readline"))) "\n") + (let (assoc input (system "readline")) + (if + (= "(" (first (explode input)) ) + (print (call (parse input)) "\n") + (print (call + (parse (concat "(" input ")")) + ) "\n") + ) + ) +) diff --git a/src/Amalgam/amlg_code/store_test.amlg b/src/Amalgam/amlg_code/store_test.amlg new file mode 100644 index 00000000..e20ee0dc --- /dev/null +++ b/src/Amalgam/amlg_code/store_test.amlg @@ -0,0 +1 @@ +(list 1 2 3 4) diff --git a/src/Amalgam/amlg_code/test.amlg b/src/Amalgam/amlg_code/test.amlg new file mode 100644 index 00000000..2847c612 --- /dev/null +++ b/src/Amalgam/amlg_code/test.amlg @@ -0,0 +1,4 @@ +(seq + ;(print (format (list (assoc a 3 b 4) (assoc c "c" d (null))) "code" "yaml") "\n") + (print (format (true) "code" "yaml") "\n") +) \ No newline at end of file diff --git a/src/Amalgam/entity/Entity.cpp b/src/Amalgam/entity/Entity.cpp new file mode 100644 index 00000000..f0e39135 --- /dev/null +++ b/src/Amalgam/entity/Entity.cpp @@ -0,0 +1,1065 @@ +//project headers: +#include "Entity.h" +#include "AssetManager.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EntityWriteListener.h" +#include "EvaluableNodeTreeManipulation.h" +#include "EvaluableNodeTreeFunctions.h" + +std::vector Entity::emptyContainedEntities; + +Entity::Entity() +{ + hasContainedEntities = false; + entityRelationships.container = nullptr; + + SetRoot(nullptr, false); + + idStringId = StringInternPool::NOT_A_STRING_ID; +} + +Entity::Entity(Entity *_container, std::string &code_string, const std::string &rand_state, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier) + : randomStream(rand_state) +{ + hasContainedEntities = false; + entityRelationships.container = _container; + + SetRoot(code_string, metadata_modifier); + + idStringId = StringInternPool::NOT_A_STRING_ID; +} + +Entity::Entity(Entity *_container, EvaluableNode *_root, const std::string &rand_state, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier) + : randomStream(rand_state) +{ + hasContainedEntities = false; + entityRelationships.container = _container; + + //since this is the constructor, can't have had this entity's EntityNodeManager + SetRoot(_root, false, metadata_modifier); + + idStringId = StringInternPool::NOT_A_STRING_ID; +} + +Entity::Entity(Entity *t) +{ + //start with an empty entity to make sure SetRoot works fine + randomStream = t->randomStream; + hasContainedEntities = false; + entityRelationships.container = nullptr; + + SetRoot(t->evaluableNodeManager.GetRootNode(), false); + + idStringId = StringInternPool::NOT_A_STRING_ID; + + hasContainedEntities = t->hasContainedEntities; + + if(hasContainedEntities) + { + entityRelationships.relationships = new EntityRelationships(); + + auto &t_contained_entities = t->GetContainedEntities(); + + //copy all contained entities + entityRelationships.relationships->containedEntities.reserve(t_contained_entities.size()); + for(Entity *e : t_contained_entities) + { + Entity *child_copy = new Entity(e); + AddContainedEntity(child_copy, e->GetIdStringId()); + } + + entityRelationships.relationships->container = nullptr; + } + else + { + entityRelationships.container = nullptr; + } +} + +Entity::~Entity() +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(mutex); +#endif + + Entity *container = GetContainer(); + size_t last_index_of_container = 0; + if(container != nullptr) + last_index_of_container = container->GetNumContainedEntities() - 1; //if there's no contained entities, then it wouldn't have a container! + + EntityQueryManager::RemoveEntity(container, this, GetEntityIndexOfContainer(), last_index_of_container); + + if(hasContainedEntities) + { + //delete the entities from highest index to lowest index to reduce churn when freeing the query caches + auto &contained_entities = entityRelationships.relationships->containedEntities; + for(size_t i = contained_entities.size(); i > 0; i--) + { + size_t index = i - 1; + delete contained_entities[index]; + } + + delete entityRelationships.relationships; + } + + string_intern_pool.DestroyStringReference(idStringId); + string_intern_pool.DestroyStringReferences(labelIndex, [](auto l) { return l.first; }); +} + +EvaluableNodeReference Entity::GetValueAtLabel(StringInternPool::StringID label_sid, EvaluableNodeManager *destination_temp_enm, bool direct_get, bool on_self, bool batch_call) +{ + if(label_sid <= StringInternPool::EMPTY_STRING_ID) + return EvaluableNodeReference::Null(); + + if(!on_self && IsLabelPrivate(label_sid)) + return EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + //don't lock if batch_call is set + Concurrency::ReadLock lock(mutex, std::defer_lock); + if(!batch_call) + lock.lock(); +#endif + + const auto &label = labelIndex.find(label_sid); + + if(label == end(labelIndex)) + return EvaluableNodeReference::Null(); + + if(label->second == nullptr) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference retval(label->second, false); + + //if didn't give a valid destination, just return what we have + if(destination_temp_enm == nullptr) + return retval; + + return destination_temp_enm->DeepAllocCopy(retval, direct_get ? EvaluableNodeManager::ENMM_NO_CHANGE : EvaluableNodeManager::ENMM_REMOVE_ALL); +} + +bool Entity::GetValueAtLabelAsNumber(StringInternPool::StringID label_sid, double &value_out, bool on_self) +{ + constexpr double value_if_not_found = std::numeric_limits::quiet_NaN(); + + if(label_sid <= StringInternPool::EMPTY_STRING_ID) + { + value_out = value_if_not_found; + return false; + } + + if(!on_self && IsLabelPrivate(label_sid)) + { + value_out = value_if_not_found; + return false; + } + + const auto &label = labelIndex.find(label_sid); + if(label == end(labelIndex)) + { + value_out = value_if_not_found; + return false; + } + + value_out = EvaluableNode::ToNumber(label->second); + return true; +} + +bool Entity::GetValueAtLabelAsStringId(StringInternPool::StringID label_sid, StringInternPool::StringID &value_out, bool on_self) +{ + if(label_sid <= StringInternPool::EMPTY_STRING_ID) + { + value_out = StringInternPool::NOT_A_STRING_ID; + return false; + } + + if(!on_self && IsLabelPrivate(label_sid)) + { + value_out = StringInternPool::NOT_A_STRING_ID; + return false; + } + + const auto &label = labelIndex.find(label_sid); + if(label == end(labelIndex)) + { + value_out = StringInternPool::NOT_A_STRING_ID; + return false; + } + + value_out = EvaluableNode::ToStringIDIfExists(label->second); + return true; +} + +bool Entity::GetValueAtLabelAsString(StringInternPool::StringID label_sid, std::string &value_out, bool on_self) +{ + if(label_sid <= StringInternPool::EMPTY_STRING_ID) + { + value_out = ""; + return false; + } + + if(!on_self && IsLabelPrivate(label_sid)) + { + value_out = ""; + return false; + } + + const auto &label = labelIndex.find(label_sid); + if(label == end(labelIndex)) + { + value_out = ""; + return false; + } + + value_out = Parser::Unparse(label->second, &evaluableNodeManager, false, false); + return true; +} + +EvaluableNodeImmediateValueType Entity::GetValueAtLabelAsImmediateValue(StringInternPool::StringID label_sid, + EvaluableNodeImmediateValue &value_out, bool on_self) +{ + if(label_sid <= StringInternPool::EMPTY_STRING_ID) + { + value_out.number = std::numeric_limits::quiet_NaN(); + return ENIVT_NOT_EXIST; + } + + if(!on_self && IsLabelPrivate(label_sid)) + { + value_out.number = std::numeric_limits::quiet_NaN(); + return ENIVT_NOT_EXIST; + } + + const auto &label = labelIndex.find(label_sid); + if(label == end(labelIndex)) + { + value_out.number = std::numeric_limits::quiet_NaN(); + return ENIVT_NOT_EXIST; + } + + return value_out.CopyValueFromEvaluableNode(label->second); +} + +bool Entity::SetValueAtLabel(StringInternPool::StringID label_sid, EvaluableNodeReference &new_value, bool direct_set, + std::vector *write_listeners, bool on_self, bool batch_call) +{ + if(label_sid <= StringInternPool::EMPTY_STRING_ID) + return false; + + if(!on_self) + { + if(IsLabelPrivate(label_sid)) + return EvaluableNodeReference(nullptr, true); + + //since it's not setting on self, another entity owns the data so it isn't unique to this entity + new_value.unique = false; + } + + auto current_node = labelIndex.find(label_sid); + + //if the label is not in the system, then can't do anything + if(current_node == end(labelIndex)) + return false; + + EvaluableNode *destination = current_node->second; + + //can't replace if the label points to null - shouldn't happen + if(destination == nullptr) + return false; + + if(!direct_set) + { + if(new_value == nullptr || new_value->GetNumChildNodes() == 0) + { + //if simple copy value, then just do it + destination->CopyValueFrom(new_value); + } + else //need to copy child nodes + { + //remove all labels and allocate if needed + if(new_value.unique) + EvaluableNodeManager::ModifyLabelsForNodeTree(new_value, EvaluableNodeManager::ENMM_REMOVE_ALL); + else + new_value = evaluableNodeManager.DeepAllocCopy(new_value, EvaluableNodeManager::ENMM_REMOVE_ALL); + + //copy over the existing node, but don't update labels, etc. + destination->CopyValueFrom(new_value); + } + } + else //direct set + { + //allocate and remove any extra label indirections + //if replacement is null, create a new null node because will want to retain the fact that an addressable + // node exists in case it is reused in multiple places + if(new_value != nullptr) + { + if(new_value.unique) + EvaluableNodeManager::ModifyLabelsForNodeTree(new_value, EvaluableNodeManager::ENMM_LABEL_ESCAPE_DECREMENT); + else + new_value = evaluableNodeManager.DeepAllocCopy(new_value, EvaluableNodeManager::ENMM_LABEL_ESCAPE_DECREMENT); + } + else + { + new_value = EvaluableNodeReference(evaluableNodeManager.AllocNode(ENT_NULL), true); + } + //the value is being used in the entity, so no longer unique if it was before + new_value.unique = false; + + //update the index + labelIndex[label_sid] = new_value; + + //need to replace label in case there are any collapses of labels if multiple labels set + EvaluableNode *root = evaluableNodeManager.GetRootNode(); + + EvaluableNodeTreeManipulation::ReplaceLabelInTree(root, label_sid, new_value); + evaluableNodeManager.SetRootNode(root); + + if(!batch_call) + RebuildLabelIndex(); + } + + if(!batch_call) + { + Entity *container = GetContainer(); + if(direct_set) + EntityQueryManager::UpdateAllEntityLabels(container, this, GetEntityIndexOfContainer()); + else + EntityQueryManager::UpdateEntityLabel(container, this, GetEntityIndexOfContainer(), label_sid); + + asset_manager.UpdateEntity(this); + if(write_listeners != nullptr) + { + for(auto &wl : *write_listeners) + wl->LogWriteValueToEntity(this, new_value, label_sid, direct_set); + } + } + + return true; +} + +//like SetValuesAtLabels, except accumulates each value at each label instead +std::pair Entity::SetValuesAtLabels(EvaluableNodeReference &new_label_values, bool accum_values, bool direct_set, + std::vector *write_listeners, size_t *num_new_nodes_allocated, bool on_self, bool copy_entity) +{ + //can only work with assoc arrays + if(!EvaluableNode::IsAssociativeArray(new_label_values)) + return std::make_pair(false, false); + + //if it's not setting on self, another entity owns the data so it isn't unique to this entity + if(!on_self) + new_label_values.unique = false; + + if(copy_entity) + SetRoot(GetRoot(), false); + + //if relevant, keep track of new memory allocated to the entity + size_t prev_size = 0; + if(num_new_nodes_allocated != nullptr) + prev_size = GetDeepSizeInNodes(); + + //make assignments + bool any_successful_assignment = false; + bool all_successful_assignments = true; + auto &new_label_values_mcn = new_label_values->GetMappedChildNodesReference(); + for(auto &[assignment_id, assignment] : new_label_values_mcn) + { + StringInternPool::StringID variable_sid = assignment_id; + EvaluableNodeReference variable_value_node(assignment, new_label_values.unique); + + if(accum_values) + { + //if copy_entity is set, then can treat variable_value_node as unique because it is working on an isolated copy + EvaluableNodeReference value_destination_node(GetValueAtLabel(variable_sid, nullptr, true, true, true), copy_entity); + //can't assign to a label if it doesn't exist + if(value_destination_node == nullptr) + continue; + + variable_value_node = AccumulateEvaluableNodeIntoEvaluableNode(value_destination_node, variable_value_node, &evaluableNodeManager); + } + + if(SetValueAtLabel(variable_sid, variable_value_node, direct_set, write_listeners, on_self, true)) + any_successful_assignment = true; + else + all_successful_assignments = false; + } + + if(any_successful_assignment) + { + if(direct_set) + { + //direct assigments need a rebuild of the index just in case a label collision occurs + RebuildLabelIndex(); + EntityQueryManager::UpdateAllEntityLabels(GetContainer(), this, GetEntityIndexOfContainer()); + } + else + { + EntityQueryManager::UpdateEntityLabels(GetContainer(), this, GetEntityIndexOfContainer(), new_label_values_mcn); + } + + asset_manager.UpdateEntity(this); + if(write_listeners != nullptr) + { + for(auto &wl : *write_listeners) + wl->LogWriteValuesToEntity(this, new_label_values, direct_set); + } + + if(num_new_nodes_allocated != nullptr) + { + size_t cur_size = GetDeepSizeInNodes(); + //don't get credit for freeing memory, but do count toward memory consumed + if(cur_size > prev_size) + *num_new_nodes_allocated = cur_size - prev_size; + } + } + + return std::make_pair(any_successful_assignment, all_successful_assignments); +} + +EvaluableNodeReference Entity::Execute(ExecutionCycleCount max_num_steps, ExecutionCycleCount &num_steps_executed, + size_t max_num_nodes, size_t &num_nodes_allocated, + std::vector *write_listeners, PrintListener *print_listener, + EvaluableNode *call_stack, bool on_self, EvaluableNodeManager *destination_temp_enm, +#ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock *locked_memory_modification_lock, +#endif + StringInternPool::StringID label_sid, Interpreter *calling_interpreter) +{ + if(!on_self && IsLabelPrivate(label_sid)) + return EvaluableNodeReference(nullptr, true); + +#ifdef MULTITHREAD_SUPPORT + if(locked_memory_modification_lock != nullptr) + locked_memory_modification_lock->unlock(); + + Concurrency::WriteLock write_lock(mutex); +#endif + + EvaluableNode *node_to_execute = nullptr; + if(label_sid <= StringInternPool::EMPTY_STRING_ID) //if not specified, then use root + node_to_execute = evaluableNodeManager.GetRootNode(); + else //get code at label + { + const auto &label = labelIndex.find(label_sid); + + if(label != end(labelIndex)) + node_to_execute = label->second; + } + + //if label not found or no code, can't do anything + if(node_to_execute == nullptr) + { + #ifdef MULTITHREAD_SUPPORT + //put lock back in place + if(locked_memory_modification_lock != nullptr) + locked_memory_modification_lock->lock(); + #endif + return EvaluableNodeReference::Null(); + } + + size_t a_priori_entity_storage = evaluableNodeManager.GetNumberOfUsedNodes(); + + Interpreter interpreter(&evaluableNodeManager, max_num_steps, max_num_nodes, randomStream.CreateOtherStreamViaRand(), + write_listeners, print_listener, this, calling_interpreter); + +#ifdef MULTITHREAD_SUPPORT + interpreter.memoryModificationLock = Concurrency::ReadLock(interpreter.evaluableNodeManager->memoryModificationMutex); + write_lock.unlock(); +#endif + + EvaluableNodeReference retval = interpreter.ExecuteNode(node_to_execute, call_stack); + num_steps_executed = interpreter.GetNumStepsExecuted(); + +#ifdef MULTITHREAD_SUPPORT + //make sure have lock before copy into destination_temp_enm + if(locked_memory_modification_lock != nullptr) + locked_memory_modification_lock->lock(); +#endif + //make a copy in the appropriate location if possible and necessary + if(destination_temp_enm != nullptr) + { + //only need to make a copy if it's a different destination + if(destination_temp_enm != &evaluableNodeManager) + { + //make a copy and free the original + EvaluableNodeReference retval_copy = destination_temp_enm->DeepAllocCopy(retval); + evaluableNodeManager.FreeNodeTreeIfPossible(retval); + retval = retval_copy; + } + } + else //don't want anything back + { + evaluableNodeManager.FreeNodeTreeIfPossible(retval); + retval = EvaluableNodeReference::Null(); + } + + //find difference in entity size + size_t post_entity_storage = evaluableNodeManager.GetNumberOfUsedNodes() + interpreter.GetNumEntityNodesAllocated(); + if(a_priori_entity_storage > post_entity_storage) + num_nodes_allocated = 0; + else + num_nodes_allocated = post_entity_storage - a_priori_entity_storage; + +#ifdef MULTITHREAD_SUPPORT + interpreter.memoryModificationLock.unlock(); +#endif + + return retval; +} + +bool Entity::IsEntityCurrentlyBeingExecuted() +{ + if(hasContainedEntities) + { + for(auto ce : entityRelationships.relationships->containedEntities) + { + if(ce->IsEntityCurrentlyBeingExecuted()) + return true; + } + } + + return evaluableNodeManager.IsAnyNodeReferencedOtherThanRoot(); +} + +EvaluableNodeReference Entity::GetRoot(EvaluableNodeManager *destination_temp_enm, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier) +{ + EvaluableNode *root = evaluableNodeManager.GetRootNode(); + + if(destination_temp_enm == nullptr) + return EvaluableNodeReference(root, false); + + return destination_temp_enm->DeepAllocCopy(root, metadata_modifier); +} + +size_t Entity::GetDeepSizeInNodes() +{ + size_t total_size = GetSizeInNodes(); + + //count one more for being an entity + total_size += 1; + + //count one more if customly named + if(IsNamedEntity(GetId())) + total_size += 1; + + for(auto entity : GetContainedEntities()) + total_size += entity->GetDeepSizeInNodes(); + + return total_size; +} + +size_t Entity::GetEstimatedReservedDeepSizeInBytes() +{ + size_t total_size = evaluableNodeManager.GetEstimatedTotalReservedSizeInBytes(); + + for(auto entity : GetContainedEntities()) + total_size += entity->GetEstimatedReservedDeepSizeInBytes(); + + return total_size; +} + +size_t Entity::GetEstimatedUsedDeepSizeInBytes() +{ + size_t total_size = evaluableNodeManager.GetEstimatedTotalUsedSizeInBytes(); + + for(auto entity : GetContainedEntities()) + total_size += entity->GetEstimatedReservedDeepSizeInBytes(); + + return total_size; +} + +Entity::LabelsAssocType Entity::RebuildLabelIndex() +{ + auto [new_labels, renormalized] = EvaluableNodeTreeManipulation::RetrieveLabelIndexesFromTreeAndNormalize(evaluableNodeManager.GetRootNode()); + + //update references (create new ones before destroying old ones so they do not need to be recreated) + string_intern_pool.CreateStringReferences(new_labels, [](auto l) { return l.first; } ); + string_intern_pool.DestroyStringReferences(labelIndex, [](auto l) { return l.first; }); + + //let the destructor of new_labels deallocate the old labelIndex + std::swap(labelIndex, new_labels); + + if(renormalized) + new_labels.clear(); + + //new_labels now holds the previous labels + return new_labels; +} + +StringInternPool::StringID Entity::AddContainedEntity(Entity *t, StringInternPool::StringID id_sid, std::vector *write_listeners) +{ + if(t == nullptr) + return StringInternPool::NOT_A_STRING_ID; + +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(mutex); + Concurrency::WriteLock lock_t(t->mutex); +#endif + + EnsureHasContainedEntities(); + + auto &id_to_index_lookup = entityRelationships.relationships->containedEntityStringIdToIndex; + auto &contained_entities = entityRelationships.relationships->containedEntities; + + //the index that t will be inserted to + size_t t_index = contained_entities.size(); + + StringInternPool::StringID previous_t_sid = t->idStringId; + + //autoassign an ID if not specified + if(id_sid == StringInternPool::NOT_A_STRING_ID) + { + std::string new_id; + for(;;) + { + //add a _ in front to differentiate from numbers + new_id = "_" + EvaluableNode::NumberToString(static_cast(randomStream.RandUInt32())); + + t->idStringId = string_intern_pool.CreateStringReference(new_id); + + //if not currently in use, then use it and stop searching + if(id_to_index_lookup.insert(std::make_pair(t->idStringId, t_index)).second == true) + break; + + //couldn't add it, so must already be in use. Free and make another + string_intern_pool.DestroyStringReference(t->idStringId); + } + } + else + { + //attempt to insert, or return empty string if fail + if(id_to_index_lookup.insert(std::make_pair(id_sid, t_index)).second == false) + return StringInternPool::NOT_A_STRING_ID; + + t->idStringId = string_intern_pool.CreateStringReference(id_sid); + } + + //insert the entity pointer + contained_entities.push_back(t); + + //clear previous references if applicable + string_intern_pool.DestroyStringReference(previous_t_sid); + + t->SetEntityContainer(this); + +#ifdef MULTITHREAD_SUPPORT + //relock the contained entity for read-only + // do this while the this, the container, remains exclusively locked to prevent something else from editing it + lock_t.unlock(); + Concurrency::ReadLock read_lock_t(t->mutex); + ////done writing to the this entity, the container + lock.unlock(); +#endif + + EntityQueryManager::AddEntity(this, t, t_index); + + if(write_listeners != nullptr) + { + for(auto &wl : *write_listeners) + wl->LogCreateEntity(t); + asset_manager.CreateEntity(t); + } + + return t->idStringId; +} + +StringInternPool::StringID Entity::AddContainedEntity(Entity *t, std::string id_string, std::vector *write_listeners) +{ + if(t == nullptr) + return StringInternPool::NOT_A_STRING_ID; + +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(mutex); + Concurrency::WriteLock lock_t(t->mutex); +#endif + + EnsureHasContainedEntities(); + + auto &id_to_index_lookup = entityRelationships.relationships->containedEntityStringIdToIndex; + auto &contained_entities = entityRelationships.relationships->containedEntities; + + //the index that t will be inserted to + size_t t_index = contained_entities.size(); + + StringInternPool::StringID previous_t_sid = t->idStringId; + + //autoassign an ID if not specified + if(id_string == "") + { + for(;;) + { + //add a _ in front to differentiate from numbers + id_string = "_" + EvaluableNode::NumberToString(static_cast(randomStream.RandUInt32())); + + t->idStringId = string_intern_pool.CreateStringReference(id_string); + + //if the string is currently in use, but not in this entity then use it and stop searching + if(id_to_index_lookup.insert(std::make_pair(t->idStringId, t_index)).second == true) + break; + + //couldn't add it, so must already be in use. Free and make another + string_intern_pool.DestroyStringReference(t->idStringId); + } + } + else + { + t->idStringId = string_intern_pool.CreateStringReference(id_string); + + //attempt to insert, or return empty string if fail + if(id_to_index_lookup.insert(std::make_pair(t->idStringId, t_index)).second == false) + { + string_intern_pool.DestroyStringReference(t->idStringId); + return StringInternPool::NOT_A_STRING_ID; + } + } + + //insert the entity pointer + contained_entities.push_back(t); + + //clear previous references if applicable + string_intern_pool.DestroyStringReference(previous_t_sid); + + t->SetEntityContainer(this); + +#ifdef MULTITHREAD_SUPPORT + //relock the contained entity for read-only + // do this while the this, the container, remains exclusively locked to prevent something else from editing it + lock_t.unlock(); + Concurrency::ReadLock read_lock_t(t->mutex); + //done writing to the this entity, the container + lock.unlock(); +#endif + + EntityQueryManager::AddEntity(this, t, t_index); + + if(write_listeners != nullptr) + { + for(auto &wl : *write_listeners) + wl->LogCreateEntity(t); + asset_manager.CreateEntity(t); + } + + return t->idStringId; +} + + +void Entity::RemoveContainedEntity(StringInternPool::StringID id, std::vector *write_listeners) +{ + +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock write_lock(mutex); +#endif + + if(!hasContainedEntities) + return; + + auto &id_to_index_lookup = entityRelationships.relationships->containedEntityStringIdToIndex; + auto &contained_entities = entityRelationships.relationships->containedEntities; + + //find the entity by id + const auto &id_to_index_it_to_remove = id_to_index_lookup.find(id); + if(id_to_index_it_to_remove == end(id_to_index_lookup)) + return; + + //get the index + size_t index_to_remove = id_to_index_it_to_remove->second; + size_t index_to_replace = contained_entities.size() - 1; + Entity *entity_to_remove = contained_entities[index_to_remove]; + +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock write_lock_t(entity_to_remove->mutex); +#endif + + //record the entity as being deleted + if(write_listeners != nullptr) + { + for(auto &wl : *write_listeners) + wl->LogDestroyEntity(entity_to_remove); + + asset_manager.DestroyEntity(entity_to_remove); + } + + EntityQueryManager::RemoveEntity(this, entity_to_remove, index_to_remove, index_to_replace); + + entity_to_remove->SetEntityContainer(nullptr); + + //remove the lookup + id_to_index_lookup.erase(id_to_index_it_to_remove); + + //if there's at least one entity left, then move the last one into this removed slot + if(index_to_replace > 0) + { + //if not removing the last entity, then swap the last into the empty slot + if(index_to_remove != index_to_replace) + { + //update the last entity's index and move it into the location removed + id_to_index_lookup[contained_entities[index_to_replace]->GetIdStringId()] = index_to_remove; + + //swap last entity with this new one, remove from contained_entities + std::swap(contained_entities[index_to_remove], contained_entities[index_to_replace]); + } + + contained_entities.resize(index_to_replace); + } + else // removed the last entity, clean up + { + Entity *container = entityRelationships.relationships->container; + delete entityRelationships.relationships; + + entityRelationships.container = container; + hasContainedEntities = false; + } +} + +Entity *Entity::GetContainedEntity(StringInternPool::StringID id) +{ + if(!hasContainedEntities) + return nullptr; + + auto &id_to_index_lookup = entityRelationships.relationships->containedEntityStringIdToIndex; + const auto &it = id_to_index_lookup.find(id); + if(it == end(id_to_index_lookup)) + return nullptr; + + //look up the pointer by its index + return entityRelationships.relationships->containedEntities[it->second]; +} + +size_t Entity::GetContainedEntityIndex(StringInternPool::StringID id) +{ + if(!hasContainedEntities) + return std::numeric_limits::max(); + + auto &id_to_index_lookup = entityRelationships.relationships->containedEntityStringIdToIndex; + const auto &it = id_to_index_lookup.find(id); + if(it == end(id_to_index_lookup)) + return std::numeric_limits::max(); + + //return the index + return it->second; +} + +StringInternPool::StringID Entity::GetContainedEntityIdFromIndex(size_t entity_index) +{ + if(!hasContainedEntities) + return StringInternPool::NOT_A_STRING_ID; + + auto &contained_entities = entityRelationships.relationships->containedEntities; + if(entity_index >= contained_entities.size()) + return StringInternPool::NOT_A_STRING_ID; + + return contained_entities[entity_index]->GetIdStringId(); +} + +void Entity::SetRandomState(const std::string &new_state, bool deep_set_seed, std::vector *write_listeners) +{ + randomStream.SetState(new_state); + + if(write_listeners != nullptr) + { + for(auto &wl : *write_listeners) + wl->LogSetEntityRandomSeed(this, new_state, false); + + asset_manager.UpdateEntity(this); + } + + if(deep_set_seed) + { + for(auto entity : GetContainedEntities()) + entity->SetRandomState(randomStream.CreateOtherStreamStateViaString(entity->GetId()), true, write_listeners); + } +} + +void Entity::SetRandomStream(const RandomStream &new_stream, std::vector *write_listeners) +{ + randomStream = new_stream; + + if(write_listeners != nullptr) + { + if(write_listeners->size() > 0) + { + std::string new_state_string = randomStream.GetState(); + for(auto &wl : *write_listeners) + wl->LogSetEntityRandomSeed(this, new_state_string, false); + } + + asset_manager.UpdateEntity(this); + } +} + +std::string Entity::CreateOtherRandomStreamStateViaString(const std::string &seed_string) +{ + //TODO 10975: move this up/out a layer to what is handling the Entity +#ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock read_lock(mutex); +#endif + + return randomStream.CreateOtherStreamStateViaString(seed_string); +} + +RandomStream Entity::CreateOtherRandomStreamViaString(const std::string &seed_string) +{ + //TODO 10975: move this up/out a layer to what is handling the Entity +#ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock read_lock(mutex); +#endif + + return randomStream.CreateOtherStreamViaString(seed_string); +} + +RandomStream Entity::CreateOtherRandomStreamViaRand() +{ + //TODO 10975: move this up/out a layer to what is handling the Entity +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock write_lock(mutex); +#endif + + return randomStream.CreateOtherStreamViaRand(); +} + +void Entity::SetRoot(EvaluableNode *_code, bool allocated_with_entity_enm, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier, std::vector *write_listeners) +{ + EvaluableNode *previous_root = evaluableNodeManager.GetRootNode(); + + if(_code == nullptr) + { + evaluableNodeManager.SetRootNode(evaluableNodeManager.AllocNode(ENT_NULL)); + } + else if(allocated_with_entity_enm && metadata_modifier == EvaluableNodeManager::ENMM_NO_CHANGE) + { + evaluableNodeManager.SetRootNode(_code); + } + else + { + auto code_copy = evaluableNodeManager.DeepAllocCopy(_code, metadata_modifier); + evaluableNodeManager.SetRootNode(code_copy.reference); + } + + //keep reference for current root + evaluableNodeManager.KeepNodeReference(evaluableNodeManager.GetRootNode()); + + //free current root reference + evaluableNodeManager.FreeNodeReference(previous_root); + + RebuildLabelIndex(); + + EntityQueryManager::UpdateAllEntityLabels(GetContainer(), this, GetEntityIndexOfContainer()); + if(write_listeners != nullptr) + { + if(write_listeners->size() > 0) + { + std::string new_code_string = Parser::Unparse(evaluableNodeManager.GetRootNode(), &evaluableNodeManager); + + for(auto &wl : *write_listeners) + wl->LogWriteToEntity(this, new_code_string); + } + + asset_manager.UpdateEntity(this); + } +} + +void Entity::SetRoot(std::string &code_string, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier, std::vector *write_listeners) +{ + EvaluableNodeReference new_code = Parser::Parse(code_string, &evaluableNodeManager); + SetRoot(new_code.reference, true, metadata_modifier, write_listeners); +} + +void Entity::AccumRoot(EvaluableNode *accum_code, bool allocated_with_entity_enm, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier, std::vector *write_listeners) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock write_lock(mutex); +#endif + + if( !(allocated_with_entity_enm && metadata_modifier == EvaluableNodeManager::ENMM_NO_CHANGE)) + { + auto code_copy = evaluableNodeManager.DeepAllocCopy(accum_code, metadata_modifier); + accum_code = code_copy.reference; + } + + bool accum_has_labels = EvaluableNodeTreeManipulation::DoesTreeContainLabels(accum_code); + + EvaluableNode *previous_root = evaluableNodeManager.GetRootNode(); + EvaluableNodeReference new_root = AccumulateEvaluableNodeIntoEvaluableNode(EvaluableNodeReference(previous_root, true), EvaluableNodeReference(accum_code, true), &evaluableNodeManager); + + //need to check if still cycle free as it may no longer be + EvaluableNodeManager::UpdateFlagsForNodeTree(new_root); + + if(new_root != previous_root) + { + //keep reference for current root (mainly in case a new node was added if the entity were previously empty) + evaluableNodeManager.KeepNodeReference(new_root); + + evaluableNodeManager.SetRootNode(new_root); + + //free current root reference + evaluableNodeManager.FreeNodeReference(previous_root); + } + + size_t num_root_labels_to_update = 0; + if(new_root != nullptr) + num_root_labels_to_update = new_root->GetNumLabels(); + + if(accum_has_labels) + { + LabelsAssocType prev_labels = RebuildLabelIndex(); + + //if have all new labels or RebuildLabelIndex had to renormalize (in which case prev_labels will be empty) + // then update all labels just in case + if(prev_labels.size() == 0 && labelIndex.size() > 0) + { + EntityQueryManager::UpdateAllEntityLabels(GetContainer(), this, GetEntityIndexOfContainer()); + + //root labels have been updated + num_root_labels_to_update = 0; + } + else //clean rebuild + { + EntityQueryManager::UpdateEntityLabelsAddedOrChanged(GetContainer(), this, GetEntityIndexOfContainer(), + prev_labels, labelIndex); + } + } + + //if any root labels left to update, then update them + if(num_root_labels_to_update > 0) + { + //only need to update labels on root + for(size_t i = 0; i < num_root_labels_to_update; i++) + { + auto label_sid = new_root->GetLabelStringId(i); + EntityQueryManager::UpdateEntityLabel(GetContainer(), this, GetEntityIndexOfContainer(), label_sid); + } + } + + + if(write_listeners != nullptr) + { + if(write_listeners->size() > 0) + { + std::string new_code_string = Parser::Unparse(new_root, &evaluableNodeManager); + + for(auto &wl : *write_listeners) + wl->LogWriteToEntity(this, new_code_string); + } + + asset_manager.UpdateEntity(this); + } +} + +void Entity::GetAllDeeplyContainedEntitiesGroupedRecurse(std::vector &entities) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(mutex); +#endif + + if(!hasContainedEntities) + return; + + auto &contained_entities = GetContainedEntities(); + entities.insert(end(entities), begin(contained_entities), end(contained_entities)); + + //insert a nullptr at the end to indicate this group is complete + entities.emplace_back(nullptr); + + for(auto &ce : contained_entities) + ce->GetAllDeeplyContainedEntitiesGroupedRecurse(entities); +} diff --git a/src/Amalgam/entity/Entity.h b/src/Amalgam/entity/Entity.h new file mode 100644 index 00000000..129bc3b8 --- /dev/null +++ b/src/Amalgam/entity/Entity.h @@ -0,0 +1,839 @@ +#pragma once + +//project headers: +#include "Concurrency.h" +#include "HashMaps.h" +#include "Parser.h" +#include "RandomStream.h" + +//system headers: +#include +#include +#include +#include +#include + +//forward declarations: +class EntityWriteListener; +class EvaluableNode; +class EvaluableNodeManagement; +class Interpreter; +class PrintListener; + +//An Entity is a container of code/data consisting comprised of a graph of EvaluableNode. +// They can contain other entities, can be queried and serialized. +class Entity +{ +public: + + //type for looking up an entity based on a StringID + using EntityLookupAssocType = CompactHashMap; + + //StringID to index + using StringIdToIndexAssocType = CompactHashMap; + + //set of entities + using EntitySetType = FastHashSet; + + //StringID to EvaluableNode * + using LabelsAssocType = CompactHashMap; + + Entity(); + + //create Entity from existing code, rand_state is the current state of the random number generator, modifying labels as specified + Entity(Entity *_container, std::string &code_string, const std::string &rand_state, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier = EvaluableNodeManager::ENMM_NO_CHANGE); + Entity(Entity *_container, EvaluableNode *_root, const std::string &rand_state, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier = EvaluableNodeManager::ENMM_NO_CHANGE); + + //Creates a new Entity as a copy of the Entity passed in; everything is identical except for the time created and id + Entity(Entity *t); + + ~Entity(); + + //executes the entity for up to max_num_steps on the given label_name (if empty string, then evaluates root node) + // Returns the result from the execution, sets num_steps_executed to the number executed, sets num_nodes_allocated to the number of nodes allocated in entities + // Uses the EvaluableNodeManager destination_temp_enm for any values returned that are temporary + // Uses max_num_steps as the maximum number of operations that can be executed by this and any subordinate operations called. If max_num_steps is 0, then it will execute unlimeted steps + // Uses max_num_nodes as the maximum number of nodes that can be allocated in memory by this and any subordinate operations called. If max_num_nodes is 0, then it will allow unlimited allocations + // If on_self is true, then it will be allowed to access private variables + // If locked_memory_modification_lock is specified, then it will unlock it prior to the execution, but lock it again before + // potentially writing anything out to destination_temp_enm + EvaluableNodeReference Execute(ExecutionCycleCount max_num_steps, ExecutionCycleCount &num_steps_executed, size_t max_num_nodes, size_t &num_nodes_allocated, + std::vector *write_listeners, PrintListener *print_listener, + EvaluableNode *call_stack = nullptr, bool on_self = false, EvaluableNodeManager *destination_temp_enm = nullptr, + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock *locked_memory_modification_lock = nullptr, + #endif + StringInternPool::StringID label_sid = StringInternPool::NOT_A_STRING_ID, + Interpreter *calling_interpreter = nullptr); + + //same as Execute but accepts a string for label name + inline EvaluableNodeReference Execute(ExecutionCycleCount max_num_steps, ExecutionCycleCount &num_steps_executed, + size_t max_num_nodes, size_t &num_nodes_allocated, + std::vector *write_listeners, PrintListener *print_listener, + EvaluableNode *call_stack, bool on_self, EvaluableNodeManager *destination_temp_enm, + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock *locked_memory_modification_lock, + #endif + const std::string &label_name, + Interpreter *calling_interpreter = nullptr) + { + StringInternPool::StringID label_sid = string_intern_pool.GetIDFromString(label_name); + return Execute(max_num_steps, num_steps_executed, max_num_nodes, num_nodes_allocated, write_listeners, print_listener, + call_stack, on_self, destination_temp_enm, + #ifdef MULTITHREAD_SUPPORT + locked_memory_modification_lock, + #endif + label_sid, calling_interpreter); + } + + //returns true if the entity or any of its contained entities are currently being executed, either because of multiple threads executing on it + // or calls to contained entities back to the container etc., because certain operations (such as move and destroy) + // cannot be completed if this is the case + bool IsEntityCurrentlyBeingExecuted(); + + //Returns the code for the Entity in string form + inline std::string GetCodeAsString() + { + return Parser::Unparse(evaluableNodeManager.GetRootNode(), &evaluableNodeManager); + } + + //Returns the root of the entity + // if destination_temp_enm is specified, then it will perform a copy using that EvaluableNodeManager using metadata_modifier + EvaluableNodeReference GetRoot(EvaluableNodeManager *destination_temp_enm = nullptr, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier = EvaluableNodeManager::ENMM_NO_CHANGE); + + //Returns the number of nodes in the entity + inline size_t GetSizeInNodes() + { + return EvaluableNode::GetDeepSize(evaluableNodeManager.GetRootNode()); + } + + //Returns the number of nodes in the entity and all contained entities + size_t GetDeepSizeInNodes(); + + //Returns the estimated size of all memory managers in this entity and all contained entities + // only an estimate because the platform's underlying memory management system may need to allocate additional + // memory that cannot be easily accounted for + size_t GetEstimatedReservedDeepSizeInBytes(); + size_t GetEstimatedUsedDeepSizeInBytes(); + + //Returns the EvaluableNode at the specified label_sid + // Returns nullptr if the label does not exist + // Uses the EvaluableNodeManager destination_temp_enm to make a deep copy of the value. + // If destination_temp_enm is nullptr, it will return the node reference directly. + // If direct_get is true, then it will return values with all labels + // If on_self is true, then it will be allowed to access private variables + // If batch_call is true, then it assumes it will be called in a batch of updates and will not perform any cleanup or synchronization + EvaluableNodeReference GetValueAtLabel(StringInternPool::StringID label_sid, EvaluableNodeManager *destination_temp_enm, bool direct_get, + bool on_self = false, bool batch_call = false); + + //same as GetValueAtLabel but accepts a string for label_name + inline EvaluableNodeReference GetValueAtLabel(const std::string &label_name, EvaluableNodeManager *destination_temp_enm, bool direct_get, bool on_self = false) + { + StringInternPool::StringID label_sid = string_intern_pool.GetIDFromString(label_name); + return GetValueAtLabel(label_sid, destination_temp_enm, direct_get, on_self); + } + + //Returns true if the label specified by label_sid exists + bool DoesLabelExist(StringInternPool::StringID label_sid) + { + auto cur_value_it = labelIndex.find(label_sid); + return (cur_value_it != end(labelIndex)); + } + + //Evaluates the specified label into a number and puts the value in value_out. + //If the label exists, sets value_out to the value and returns true. + // Otherwise sets value_out to -NaN and returns false + bool GetValueAtLabelAsNumber(StringInternPool::StringID label_sid, double &value_out, bool on_self = false); + + //Evaluates the specified label into a string and puts the value in value_out. + //If the label exists, sets value_out to the value and returns true. + // Otherwise sets value_out to empty string and returns false + bool GetValueAtLabelAsStringId(StringInternPool::StringID label_sid, StringInternPool::StringID &value_out, bool on_self = false); + + //Evaluates the specified label into a string and puts the value in value_out. + //If the label exists, sets value_out to the value and returns true. + // Otherwise sets value_out to empty string and returns false + bool GetValueAtLabelAsString(StringInternPool::StringID label_sid, std::string &value_out, bool on_self = false); + + //Evaluates the specified label into a EvaluableNodeImmediateValue and puts the value in value_out returns the ValueType + EvaluableNodeImmediateValueType GetValueAtLabelAsImmediateValue(StringInternPool::StringID label_sid, + EvaluableNodeImmediateValue &value_out, bool on_self = false); + + //Iterates over all of the labels, calling GetValueAtLabel for each, and passing the label sid and the node to the user specified function func + template + inline void IterateFunctionOverLabels(LabelFunc func, + EvaluableNodeManager *destination_temp_enm = nullptr, bool direct_get = false, bool on_self = false) + { + for(auto &[label_id, _] : labelIndex) + { + EvaluableNode *node = GetValueAtLabel(label_id, destination_temp_enm, direct_get, on_self, true); + if(node != nullptr) + func(label_id, node); + } + } + + //Sets the node at label_name to new_value. + // If new_value is unique (EvaluableNodeReference property) and on_self is true, then it will take ownership of new_value + //Retains true if the value (or modification thereof) was able to be set, false if the label does not exist or it fails for other reasons + // If direct_get is true, then it will return values with all labels + // If on_self is true, then it will be allowed to access private variables + // If batch_call is true, then it assumes it will be called in a batch of updates and will not perform any cleanup + //note that this cannot be called concurrently on the same entity + bool SetValueAtLabel(StringInternPool::StringID label_sid, EvaluableNodeReference &new_value, bool direct_set, + std::vector *write_listeners, bool on_self = false, bool batch_call = false); + + //For each label-value pair in an associative array new_label_values, attempts to set the value at the label + // If new_value is unique (EvaluableNodeReference property) and on_self is true, then it will take ownership of new_value + // returns a pair of values; the first is true if any assignment was successful, the second is only true if all assignments were successful + // if accum_values is true, then it will accumulate the values to the labels rather than setting them + // if num_new_nodes_allocated is not null, then it will be set to the total amount of new memory taken up by the entity at the end of the call + // other parameters match those of SetValueAtLabel, and will call SetValueAtLabel with batch_call = true + // if copy_entity is true, then it will make a full copy of the entity before setting the labels in a copy-on-write fashion (for concurrent access) + std::pair SetValuesAtLabels(EvaluableNodeReference &new_label_values, bool accum_values, bool direct_set, + std::vector *write_listeners, size_t *num_new_nodes_allocated, bool on_self, bool copy_entity); + + //Rebuilds label index for retrieval + // returns the previous label index prior to rebuild; if the label index had to be rebuilt from scratch + // due to a label collision, then the previous label index will be empty + LabelsAssocType RebuildLabelIndex(); + + //Returns the id for this Entity + inline const std::string &GetId() + { + return string_intern_pool.GetStringFromID(GetIdStringId()); + } + + //Returns the Id String's StringID (the index pointing to the Entity's ID string) + constexpr StringInternPool::StringID GetIdStringId() + { + return idStringId; + } + + //Adds t to be contained by this Entity + // if _id is empty, then it will automatically generate an _id + //returns the id used, empty string on failure + /// write_listeners is optional, and if specified, will log the event + StringInternPool::StringID AddContainedEntity(Entity *t, StringInternPool::StringID id_sid, std::vector *write_listeners = nullptr); + + StringInternPool::StringID AddContainedEntity(Entity *t, std::string id_string, std::vector *write_listeners = nullptr); + + inline void AddContainedEntityViaReference(Entity *t, StringInternRef &sir, std::vector *write_listeners = nullptr) + { + StringInternPool::StringID new_sid = AddContainedEntity(t, static_cast(sir), write_listeners); + sir.SetIDAndCreateReference(new_sid); + } + + inline void AddContainedEntityViaReference(Entity *t, StringInternWeakRef &siwr, std::vector *write_listeners = nullptr) + { + StringInternPool::StringID new_sid = AddContainedEntity(t, static_cast(siwr), write_listeners); + siwr.SetID(new_sid); + } + + //Removes the specified id from being contained by this Entity + /// write_listeners is optional, and if specified, will log the event + void RemoveContainedEntity(StringInternPool::StringID id, std::vector *write_listeners = nullptr); + + //returns the ID for a Entity that is contained by this Entity, null if it does not exist + Entity *GetContainedEntity(StringInternPool::StringID id); + + //returns the entity index for the given id + // if not found, will return std::numeric_limits::max() + size_t GetContainedEntityIndex(StringInternPool::StringID id); + + //looks up the contained entity's string id based on its index in contained entities list + StringInternPool::StringID GetContainedEntityIdFromIndex(size_t entity_index); + + //returns true if this entity has one or more contained entities + constexpr bool HasContainedEntities() + { + return hasContainedEntities; + } + + //returns the number of contained entities + inline size_t GetNumContainedEntities() + { + if(hasContainedEntities) + return entityRelationships.relationships->containedEntities.size(); + else + return 0; + } + + //Returns direct access to vector of pointers to Entity objects contained by this Entity + inline std::vector &GetContainedEntities() + { + if(hasContainedEntities) + return entityRelationships.relationships->containedEntities; + else + return emptyContainedEntities; + } + + //Returns the containing entity + inline Entity *GetContainer() + { + if(hasContainedEntities) + return entityRelationships.relationships->container; + else + return entityRelationships.container; + } + + //returns the index of the entity as listed by its container + // returns 0 if it has no container + inline size_t GetEntityIndexOfContainer() + { + Entity *container = GetContainer(); + if(container == nullptr) + return 0; + + auto index_it = container->entityRelationships.relationships->containedEntityStringIdToIndex.find(idStringId); + return index_it->second; + } + + //returns true if this Entity contains e within its own contained entities or any sub entity contains it + bool DoesDeepContainEntity(Entity *e) + { + //climb back up and see if any container matches this + while(e != nullptr) + { + Entity *e_container = e->GetContainer(); + if(e_container == this) + return true; + + e = e_container; + } + return true; + } + + //returns a list of all entities contained, all entities they contain, etc. + //the returned vector will include a nullptr after each group of entities that are all contained + // by the same entity + inline std::vector GetAllDeeplyContainedEntitiesGrouped() + { + std::vector entities; + GetAllDeeplyContainedEntitiesGroupedRecurse(entities); + return entities; + } + + //gets the current state of the random stream in string form + inline std::string GetRandomState() + { + return randomStream.GetState(); + } + + //gets the current random stream in RandomStream form + inline RandomStream GetRandomStream() + { + return randomStream; + } + + //sets (seeds) the current state of the random stream based on string + // if deep_set_seed is true, it will recursively set all contained entities with appropriate seeds + // write_listeners is optional, and if specified, will log the event + void SetRandomState(const std::string &new_state, bool deep_set_seed, std::vector *write_listeners = nullptr); + + //sets (seeds) the current state of the random stream based on RandomStream + // write_listeners is optional, and if specified, will log the event + void SetRandomStream(const RandomStream &new_stream, std::vector *write_listeners = nullptr); + + //returns a random seed based on this stream's current state and seed_string parameter + std::string CreateOtherRandomStreamStateViaString(const std::string &seed_string); + + //returns a Randomstream based on this stream's current state and seed_string parameter + RandomStream CreateOtherRandomStreamViaString(const std::string &seed_string); + + //consumes random numbers from the stream to create a new RandomStream + RandomStream CreateOtherRandomStreamViaRand(); + + //Returns true if the Entity is a named entity, that is, its ID is not autogenerated + // An identity is considered named if the string represents anything other than an integer + inline static bool IsNamedEntity(const std::string &id) + { + auto position_non_integer_underscore = id.find_first_not_of("_0123456789"); + return position_non_integer_underscore != std::string::npos; + } + + inline static bool IsNamedEntity(StringInternPool::StringID id) + { + const std::string &id_name = string_intern_pool.GetStringFromID(id); + if(id_name == StringInternPool::EMPTY_STRING) + return false; + return IsNamedEntity(id_name); + } + + //Sets the code and recreates the index, modifying labels as specified + // if allocated_with_entity_enm is false, then it will copy the tree into the entity's EvaluableNodeManager, otherwise it will just assume it is already available + // write_listeners is optional, and if specified, will log the event + void SetRoot(EvaluableNode *_code, bool allocated_with_entity_enm, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier = EvaluableNodeManager::ENMM_NO_CHANGE, std::vector *write_listeners = nullptr); + void SetRoot(std::string &code_string, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier = EvaluableNodeManager::ENMM_NO_CHANGE, std::vector *write_listeners = nullptr); + + //accumulates the code and recreates the index, modifying labels as specified + // if allocated_with_entity_enm is false, then it will copy the tree into the entity's EvaluableNodeManager, otherwise it will just assume it is already available + // write_listeners is optional, and if specified, will log the event + void AccumRoot(EvaluableNode *_code, bool allocated_with_entity_enm, EvaluableNodeManager::EvaluableNodeMetadataModifier metadata_modifier = EvaluableNodeManager::ENMM_NO_CHANGE, std::vector *write_listeners = nullptr); + + //collects garbage on evaluableNodeManager +#ifdef MULTITHREAD_SUPPORT + //if multithreaded, then memory_modification_lock is the lock used for memoryModificationMutex + __forceinline void CollectGarbage(Concurrency::ReadLock *memory_modification_lock) + { + evaluableNodeManager.CollectGarbage(memory_modification_lock); + } +#else + __forceinline void CollectGarbage() + { + evaluableNodeManager.CollectGarbage(); + } +#endif + + //returns true if the label can be queried upon + static inline bool IsLabelValidAndPublic(StringInternPool::StringID label_sid) + { + if(label_sid == string_intern_pool.NOT_A_STRING_ID) + return false; + + const std::string &label_name = string_intern_pool.GetStringFromID(label_sid); + return IsLabelValidAndPublic(label_name); + } + + //same as the same-named function with StringInternPool::StringID but with actual string + static inline bool IsLabelValidAndPublic(const std::string &label_name) + { + //allow size zero label + if(label_name.size() == 0) + return true; + //commented out label + if(label_name[0] == '#') + return false; + return !IsLabelPrivate(label_name); + } + + //returns true if the label is only accessible to itself (starts with !) + static inline bool IsLabelPrivate(StringInternPool::StringID label_sid) + { + const std::string &label_name = string_intern_pool.GetStringFromID(label_sid); + return IsLabelPrivate(label_name); + } + + //same as the same-named function with StringInternPool::StringID but with actual string + static inline bool IsLabelPrivate(const std::string &label_name) + { + if(label_name.size() == 0) + return false; + if(label_name[0] == '!') + return true; + return false; + } + + //returns true if the label is accessible to contained entities (starts with ^) + static inline bool IsLabelAccessibleToContainedEntities(StringInternPool::StringID label_sid) + { + const std::string &label_name = string_intern_pool.GetStringFromID(label_sid); + return IsLabelAccessibleToContainedEntities(label_name); + } + + //same as the same-named function with StringInternPool::StringID but with actual string + static inline bool IsLabelAccessibleToContainedEntities(const std::string &label_name) + { + if(label_name.size() == 0) + return false; + if(label_name[0] == '^') + return true; + return false; + } + +#ifdef MULTITHREAD_SUPPORT + + //TODO 10975: + // * Remove most locks from Entity itself into Interpreter, etc. + // * Make sure there is a lock so the Entity can't be deleted with interpreters running + // * Apply the locking mechanisms below to all appropriate entity operations and use the appropriate Entity*Reference + + //returns true if Entity a should be locked before b + static inline bool ShouldLockEntityABeforeB(Entity *a, Entity *b) + { + if(a == nullptr || b == nullptr) + return true; + return reinterpret_cast(a) < reinterpret_cast(b); + } + + //Returns an appropriate lock object for operations on this Entity + //Note that it will only lock the Entity's immediate attributes, not contained entities, code, etc. + template + inline LockType CreateEntityLock() + { + return LockType(mutex); + } + + //Returns a vector of read locks for the whole entity and all contained Entities recursively + template + inline Concurrency::MultipleLockBufferObject CreateDeepEntityReadLocks(std::vector &lock_buffer) + { + CreateDeepEntityLocksRecurse(lock_buffer); + Concurrency::MultipleLockBufferObject mbo(lock_buffer); + return mbo; + } + + //recurively grabs read locks on the whole entity and everything contained + template + void CreateDeepEntityLocksRecurse(std::vector &lock_buffer) + { + //lock this one + lock_buffer.emplace_back(CreateEntityLock()); + + //early out if done + if(!HasContainedEntities()) + return; + + auto &contained_entities = GetContainedEntities(); + + //need to store more + size_t num_contained = contained_entities.size(); + lock_buffer.reserve(lock_buffer.size() + num_contained); + + //collect and sort contained entities by address + std::vector contained_sorted; + contained_sorted.reserve(num_contained); + + for(auto ce : contained_entities) + contained_sorted.push_back(ce); + + std::sort(begin(contained_sorted), end(contained_sorted), + [](Entity *a, Entity *b) + { + return ShouldLockEntityABeforeB(a, b); + } + ); + + //lock all contained entities before proceeding further + for(auto e : contained_sorted) + lock_buffer.emplace_back(e->CreateEntityLock()); + + for(auto e : contained_sorted) + e->CreateDeepEntityLocksRecurse(lock_buffer); + } + + //locks two entities + // locks will be released when the object is destructed + // makes sure there aren't deadlock conditions (circular wait) by consistently locking them in order of memory address + template + class TwoEntityLock + { + public: + TwoEntityLock(Entity *a, Entity *b) + { + //if equal, but not null, just lock one + if(a == b && a != nullptr) + { + entityLockA = a->CreateEntityLock(); + return; + } + + if(ShouldLockEntityABeforeB(a, b)) + { + if(a != nullptr) + entityLockA = a->CreateEntityLock(); + if(b != nullptr) + entityLockB = b->CreateEntityLock(); + } + else + { + if(b != nullptr) + entityLockB = b->CreateEntityLock(); + if(a != nullptr) + entityLockA = a->CreateEntityLock(); + } + } + + protected: + LockType entityLockA; + LockType entityLockB; + }; + + //locks three entities + // locks will be released when the object is destructed + // makes sure there aren't deadlock conditions (circular wait) by consistently locking them in order of memory address + template + class ThreeEntityLock + { + public: + ThreeEntityLock(Entity *a, Entity *b, Entity *c) + { + //if equal, but not null, just lock one + if(a == b && b == c && a != nullptr) + { + entityLockA = a->CreateEntityLock(); + return; + } + + //if two are the same, then just lock two + if(a == b) + { + if(a != nullptr) + entityLockA = a->CreateEntityLock(); + if(c != nullptr) + entityLockC = c->CreateEntityLock(); + return; + } + + if(a == c || b == c) + { + if(a != nullptr) + entityLockA = a->CreateEntityLock(); + if(b != nullptr) + entityLockB = b->CreateEntityLock(); + return; + } + + //check if any contain the other, in which case, null out any contained + if(a != nullptr && a->DoesDeepContainEntity(b)) + b = nullptr; + if(b != nullptr && b->DoesDeepContainEntity(a)) + a = nullptr; + + if(a != nullptr && a->DoesDeepContainEntity(c)) + c = nullptr; + if(c != nullptr && c->DoesDeepContainEntity(a)) + a = nullptr; + + if(c != nullptr && c->DoesDeepContainEntity(b)) + b = nullptr; + if(b != nullptr && b->DoesDeepContainEntity(c)) + c = nullptr; + + //sort in order of a, b, c + if(ShouldLockEntityABeforeB(c, a)) + std::swap(a, c); + if(ShouldLockEntityABeforeB(b, a)) + std::swap(b, c); + if(ShouldLockEntityABeforeB(c, b)) + std::swap(c, b); + + //need to lock all three in order + if(a != nullptr) + entityLockA = a->CreateEntityLock(); + if(b != nullptr) + entityLockB = b->CreateEntityLock(); + if(c != nullptr) + entityLockC = c->CreateEntityLock(); + } + + protected: + LockType entityLockA; + LockType entityLockB; + LockType entityLockC; + }; + + //Returns a vector of read locks for the whole entity and all contained Entities recursively + template + static inline Concurrency::MultipleLockBufferObject CreateDeepTwoEntityDeepLocks(Entity *a, Entity *b, std::vector &lock_buffer) + { + CreateDeepTwoEntityDeepLocksRecurse(a, b, lock_buffer); + Concurrency::MultipleLockBufferObject mbo(lock_buffer); + return mbo; + } + + template + static inline void CreateDeepTwoEntityDeepLocksRecurse(Entity *a, Entity *b, std::vector &lock_buffer) + { + //handle the cases where one of the entities is nullptr by locking the one that isn't + if(a == nullptr) + { + if(b == nullptr) + return; + + b->CreateDeepEntityLocksRecurse(lock_buffer); + return; + } + + if(b == nullptr) + { + a->CreateDeepEntityLocksRecurse(lock_buffer); + return; + } + + //both a and b are valid + + //if one contains the other, just lock the outer one + if(a->DoesDeepContainEntity(b)) + { + a->CreateDeepEntityLocksRecurse(lock_buffer); + return; + } + if(b->DoesDeepContainEntity(a)) + { + b->CreateDeepEntityLocksRecurse(lock_buffer); + return; + } + + //determine which to lock first + if(ShouldLockEntityABeforeB(a, b)) + { + a->CreateDeepEntityLocksRecurse(lock_buffer); + b->CreateDeepEntityLocksRecurse(lock_buffer); + } + else + { + b->CreateDeepEntityLocksRecurse(lock_buffer); + a->CreateDeepEntityLocksRecurse(lock_buffer); + } + } + +#endif + + //nodes used for storing the entity and for all interpreters for this entity + //the 0th node is implicitly the root node of the entity + EvaluableNodeManager evaluableNodeManager; + +protected: + + //helper function for GetAllDeeplyContainedEntitiesGrouped + void GetAllDeeplyContainedEntitiesGroupedRecurse(std::vector &entities); + + //ensures the data structures will exist for containing entities if they don't already + inline void EnsureHasContainedEntities() + { + if(!hasContainedEntities) + { + Entity *container = entityRelationships.container; + entityRelationships.relationships = new EntityRelationships; + + entityRelationships.relationships->container = container; + hasContainedEntities = true; + } + } + + //sets or overwrites the current container of this entity + inline void SetEntityContainer(Entity *container) + { + if(hasContainedEntities) + entityRelationships.relationships->container = container; + else + entityRelationships.container = container; + } + + //when an entity has contained entities, then it needs to store the container and the contained entities + struct EntityRelationships + { + //Entities contained by this Entity + std::vector containedEntities; + + //lookup from StringInternPool::StringID to the index in containedEntities corresponding to that entity + // Note that even though these are are references to StringInternPool::StringID, they are not counted as references + // because the entities are keeping track; if an entity exists, then its ID will still be a valid string reference + StringIdToIndexAssocType containedEntityStringIdToIndex; + + //Reference to the Entity that this Entity is contained by + Entity *container; + }; + + //pointer to either the container or the EntityRelationships + union EntityRelationshipsReference + { + Entity *container; + EntityRelationships *relationships; + }; + + //current list of all labels and where they are in the code + LabelsAssocType labelIndex; + + //the random stream associated with this Entity + RandomStream randomStream; + + //structure to compactly store parent and contained entities + EntityRelationshipsReference entityRelationships; + + //id of the string of the string ID used to address Entity given by container Entity + //Each entity has an ID, which is a string. Since each string is stored in the StringInternPool and referenced by a StringId, this is the entity's id stored by the stringId thus the idStringId. + StringInternPool::StringID idStringId; + +#ifdef MULTITHREAD_SUPPORT + //mutex for operations that may edit or modify the entity's properties and attributes + Concurrency::ReadWriteMutex mutex; +#endif + + //if true, then the entity has contained entities and will use the relationships reference of entityRelationships + bool hasContainedEntities; + + //container for when there are no contained entities but need to iterate over them + static std::vector emptyContainedEntities; +}; + +//base class for accessing an entity via a reference +// includes everything that can be accessed via a read operation +// note that this class should not be used directly, which is why it does not yield access to edit entity other than nullptr +class EntityReferenceBase +{ +public: + constexpr EntityReferenceBase() + : entity(nullptr) + { } + + constexpr EntityReferenceBase(Entity *e) + : entity(e) + { } + + //allow to use as an Entity * + constexpr operator Entity *() + { + return entity; + } + + //allow to check for equality of pointers + constexpr bool operator ==(EntityReferenceBase &other) + { + return entity == other.entity; + } + + //allow to check for inequality of pointers + constexpr bool operator !=(EntityReferenceBase &other) + { + return entity != other.entity; + } + + //allow to use as an Entity * + constexpr Entity *operator->() + { + return entity; + } + +protected: + Entity *entity; +}; + +#ifdef MULTITHREAD_SUPPORT + +//encapsulates EntityReferenceBase with a lock type +template +class EntityReferenceWithLock : public EntityReferenceBase +{ + EntityReferenceWithLock() : EntityReferenceBase() + { } + + EntityReferenceWithLock(Entity *e) : EntityReferenceBase(e) + { + if(e != nullptr) + lock = e->CreateEntityLock(); + } + +protected: + LockType lock; +}; + +//acts as a reference to an Entity that can be treated as an Entity * +// but also performs a read-lock on the container if multithreaded, and frees the read lock when goes out of scope +typedef EntityReferenceWithLock EntityReadReference; + +//acts as a reference to an Entity that can be treated as an Entity * +// but also performs a write-lock on the container if multithreaded, and frees the read lock when goes out of scope +typedef EntityReferenceWithLock EntityWriteReference; + +#else //not MULTITHREAD_SUPPORT + +//acts as a reference to an Entity that can be treated as an Entity * +// but also performs a read-lock on the container if multithreaded, and frees the read lock when goes out of scope +typedef EntityReferenceBase EntityReadReference; + +//acts as a reference to an Entity that can be treated as an Entity * +// but also performs a write-lock on the container if multithreaded, and frees the read lock when goes out of scope +typedef EntityReferenceBase EntityWriteReference; + +#endif diff --git a/src/Amalgam/entity/EntityExternalInterface.cpp b/src/Amalgam/entity/EntityExternalInterface.cpp new file mode 100644 index 00000000..ee6dd946 --- /dev/null +++ b/src/Amalgam/entity/EntityExternalInterface.cpp @@ -0,0 +1,574 @@ +//project headers: +#include "EntityExternalInterface.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EntityWriteListener.h" +#include "FileSupportJSON.h" + +bool EntityExternalInterface::LoadEntity(std::string &handle, std::string &path, bool persistent, bool load_contained_entities, + std::string &write_log_filename, std::string &print_log_filename, std::string rand_seed) +{ + if(rand_seed == "") + { + typedef std::chrono::steady_clock clk; + auto t = std::chrono::duration_cast(clk::now().time_since_epoch()).count(); + rand_seed = std::to_string(t); + } + + std::string file_type = ""; + Entity *entity = asset_manager.LoadEntityFromResourcePath(path, file_type, persistent, load_contained_entities, false, true, rand_seed); + asset_manager.SetRootPermission(entity, true); + + if(entity == nullptr) + return false; + + PrintListener *pl = nullptr; + std::vector wl; + + if(print_log_filename != "") + pl = new PrintListener(print_log_filename); + + if(write_log_filename != "") + { + EntityWriteListener *write_log = new EntityWriteListener(entity, false, write_log_filename); + wl.push_back(write_log); + } + + AddEntityBundle(handle, new EntityListenerBundle(entity, wl, pl)); + + return true; +} + +void EntityExternalInterface::StoreEntity(std::string &handle, std::string &path, bool update_persistence_location, bool store_contained_entities) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + Entity *entity = bundle->entity; + if(entity == nullptr) + return; + + std::string file_type = ""; + asset_manager.StoreEntityToResourcePath(entity, path, file_type, update_persistence_location, store_contained_entities, false, true, false); +} + +void EntityExternalInterface::ExecuteEntity(std::string &handle, std::string &label) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + ExecutionCycleCount max_num_steps = 0, num_steps_executed = 0; + size_t max_num_nodes = 0, num_nodes_allocated = 0; + bundle->entity->Execute(max_num_steps, num_steps_executed, max_num_nodes, num_nodes_allocated, &bundle->writeListeners, bundle->printListener, + nullptr, false, nullptr, + #ifdef MULTITHREAD_SUPPORT + nullptr, + #endif + label); +} + +void EntityExternalInterface::DeleteEntity(std::string &handle) +{ + EraseEntityBundle(handle); +} + +bool EntityExternalInterface::SetRandomSeed(std::string &handle, std::string &rand_seed) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return false; + + if(bundle->entity == nullptr) + return false; + + bundle->entity->SetRandomState(rand_seed, true, &bundle->writeListeners); + return true; +} + +std::vector EntityExternalInterface::GetEntities() +{ + std::vector entities; +#ifdef MULTITHREAD_INTERFACE + Concurrency::ReadLock read_lock(mutex); +#endif + + entities.reserve(handleToBundle.size()); + for(auto &[bundle_handle, _] : handleToBundle) + entities.push_back(bundle_handle); + + return entities; +} + +void EntityExternalInterface::AppendToLabel(std::string &handle, std::string &label, double value) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + //get the label + EvaluableNodeReference label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + if(EvaluableNode::IsOrderedArray(label_val)) + { + //modify local copy + label_val->AppendOrderedChildNode(bundle->entity->evaluableNodeManager.AllocNode(value)); + + //overwrite the label with the modified copy + bundle->SetEntityValueAtLabel(label, label_val); + } + else + { + // wrap the existing and new element in a list + EvaluableNode list(ENT_LIST); + EvaluableNode initial_value(EvaluableNode::ToNumber(label_val)); + EvaluableNode parsed_input(value); + list.AppendOrderedChildNode(&initial_value); + list.AppendOrderedChildNode(&parsed_input); + + // overwrite the label with the list + EvaluableNodeReference list_reference(&list, false); + bundle->SetEntityValueAtLabel(label, list_reference); + } +} + +void EntityExternalInterface::AppendToLabel(std::string &handle, std::string &label, std::string &value) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNodeReference label_val(bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false), false); + + if(EvaluableNode::IsOrderedArray(label_val)) + { + //modify local copy + label_val->AppendOrderedChildNode(bundle->entity->evaluableNodeManager.AllocNode(ENT_STRING, value)); + + //overwrite the label with the modified copy + bundle->SetEntityValueAtLabel(label, label_val); + } + else //need to transform it into a list + { + //wrap the existing and new element in a list + //can use local stack instead of heap because the entity will copy anyway + EvaluableNode list(ENT_LIST); + EvaluableNode initial_value(ENT_STRING, EvaluableNode::ToString(label_val)); + EvaluableNode parsed_input(ENT_STRING, value); + list.AppendOrderedChildNode(&initial_value); + list.AppendOrderedChildNode(&parsed_input); + + // overwrite the label with the list + EvaluableNodeReference list_reference(&list, false); + bundle->SetEntityValueAtLabel(label, list_reference); + } +} + +void EntityExternalInterface::SetLabel(std::string &handle, std::string &label, double value) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNode parsed_input(value); + EvaluableNodeReference parsed_input_reference(&parsed_input, false); + bundle->SetEntityValueAtLabel(label, parsed_input_reference); +} + +void EntityExternalInterface::SetLabel(std::string &handle, std::string &label, std::string &value) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNode parsed_input(ENT_STRING, value); + EvaluableNodeReference parsed_input_reference(&parsed_input, false); + bundle->SetEntityValueAtLabel(label, parsed_input_reference); +} + +double EntityExternalInterface::GetNumber(std::string &handle, std::string &label) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return std::numeric_limits::quiet_NaN(); + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + //Ensure you grab the return value before releasing resources + double ret = EvaluableNode::ToNumber(label_val); + return ret; +} + +std::string EntityExternalInterface::GetString(std::string &handle, std::string &label) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return ""; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + //Ensure you grab the return value before releasing resources + std::string ret = EvaluableNode::ToString(label_val); + return ret; +} + +std::string EntityExternalInterface::GetStringFromList(std::string &handle, std::string &label, size_t index) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return ""; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + std::string ret = ""; + + if(EvaluableNode::IsOrderedArray(label_val)) + { + auto &children = label_val->GetOrderedChildNodes(); + if(index < children.size()) + ret = EvaluableNode::ToString(children[index]); + } + else + { + ret = EvaluableNode::ToString(label_val); + } + + return ret; +} + +// ************************************ +// get, set, and append lists +// ************************************ + +size_t EntityExternalInterface::GetNumberListLength(std::string &handle, std::string &label) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return 0; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + if(label_val == nullptr) + return 0; + + size_t ret = 1; + + if(EvaluableNode::IsOrderedArray(label_val)) + { + auto &children = label_val->GetOrderedChildNodes(); + ret = children.size(); + } + + return ret; +} + +void EntityExternalInterface::GetNumberList(std::string &handle, std::string &label, double *out_arr, size_t len) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + if(label_val == nullptr) + return; + + if(EvaluableNode::IsOrderedArray(label_val)) + { + auto &children = label_val->GetOrderedChildNodes(); + size_t min = std::min(children.size(), len); + for(size_t i = 0; i < min; i++) + out_arr[i] = EvaluableNode::ToNumber(children[i]); + } + else + { + out_arr[0] = EvaluableNode::ToNumber(label_val); + } +} + +void EntityExternalInterface::GetNumberList(EvaluableNode *label_val, double *out_arr, size_t len) +{ + if(label_val == nullptr) + return; + + if(EvaluableNode::IsOrderedArray(label_val)) + { + auto &children = label_val->GetOrderedChildNodes(); + size_t min = std::min(children.size(), len); + for(size_t i = 0; i < min; i++) + out_arr[i] = EvaluableNode::ToNumber(children[i]); + } + else + { + out_arr[0] = EvaluableNode::ToNumber(label_val); + } +} + +size_t EntityExternalInterface::GetNumberMatrixWidth(std::string &handle, std::string &label) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return 0; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + if(label_val == nullptr) + return 0; + + std::size_t ret = 1; + + if(EvaluableNode::IsOrderedArray(label_val)) + { + auto &children = label_val->GetOrderedChildNodes(); + ret = children.size(); + } + + return ret; +} + +size_t EntityExternalInterface::GetNumberMatrixHeight(std::string &handle, std::string &label) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return 0; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + if(label_val == nullptr) + return 0; + + std::size_t ret = 1; + + if(EvaluableNode::IsOrderedArray(label_val)) + { + auto &children = label_val->GetOrderedChildNodes()[0]->GetOrderedChildNodes(); + ret = children.size(); + } + + return ret; +} + +void EntityExternalInterface::GetNumberMatrix(std::string &handle, std::string &label, double *out_arr, size_t w, size_t h) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + if(label_val == nullptr) + return; + + if(EvaluableNode::IsOrderedArray(label_val)) + { + auto &children = label_val->GetOrderedChildNodes(); + double *column = new double[h]; + for(size_t x = 0; x < w; x++) + { + GetNumberList(children[x], column, h); + for(size_t y = 0; y < h; y++) + out_arr[x*h + y] = column[y]; + } + delete [] column; + } +} + +EvaluableNode *NodifyNumberList(Entity *entity, double *arr, size_t len) +{ + EvaluableNodeManager *enm = &entity->evaluableNodeManager; + EvaluableNode *list_node = enm->AllocNode(ENT_LIST); + auto &children = list_node->GetOrderedChildNodes(); + children.resize(len); + for(size_t i = 0; i < len; i++) + children[i] = enm->AllocNode(arr[i]); + + return list_node; +} + +EvaluableNode *NodifyNumberMatrix(Entity *entity, double *arr, size_t w, size_t h) +{ + EvaluableNodeManager *enm = &entity->evaluableNodeManager; + EvaluableNode *matrix_node = enm->AllocNode(ENT_LIST); + + auto &children = matrix_node->GetOrderedChildNodes(); + children.resize(w); + for(size_t x = 0; x < w; x++) + { + double *column = new double[h]; + for(size_t y = 0; y < h; y++) + column[y] = arr[x*h + y]; + + children[x] = NodifyNumberList(entity, column, h); + delete [] column; + } + + return matrix_node; +} + +void EntityExternalInterface::SetNumberList(std::string &handle, std::string &label, double *arr, size_t len) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNode *list_node = NodifyNumberList(bundle->entity, arr, len); + EvaluableNodeReference list_node_reference(list_node, true); + bundle->SetEntityValueAtLabel(label, list_node_reference); +} + +void EntityExternalInterface::SetNumberMatrix(std::string &handle, std::string &label, double *arr, size_t w, size_t h) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNode *list_node = NodifyNumberMatrix(bundle->entity, arr, w, h); + EvaluableNodeReference list_node_reference(list_node, true); + bundle->SetEntityValueAtLabel(label, list_node_reference); +} + +void EntityExternalInterface::AppendNumberList(std::string &handle, std::string &label, double *arr, size_t len) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNode *list_node = NodifyNumberList(bundle->entity, arr, len); + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + label_val->AppendOrderedChildNode(list_node); +} + +size_t EntityExternalInterface::GetStringListLength(std::string &handle, std::string &label) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return 0; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + if(label_val == nullptr) + return 0; + + if(EvaluableNode::IsOrderedArray(label_val)) + { + auto &children = label_val->GetOrderedChildNodes(); + return children.size(); + } + + return 1; +} + +void EntityExternalInterface::GetStringList(std::string &handle, std::string &label, std::string *out_arr, size_t len) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, &bundle->entity->evaluableNodeManager, false); + + if(label_val == nullptr) + return; + + if(EvaluableNode::IsOrderedArray(label_val)) + { + auto &children = label_val->GetOrderedChildNodes(); + size_t min = std::min(children.size(), len); + for(size_t i = 0; i < min; i++) + out_arr[i] = EvaluableNode::ToString(children[i]); + } + else + { + out_arr[0] = EvaluableNode::ToString(label_val); + } +} + +EvaluableNode *NodifyStringList(Entity *entity, char **arr, size_t len) +{ + EvaluableNodeManager *enm = &entity->evaluableNodeManager; + EvaluableNode *list_node = enm->AllocNode(ENT_LIST); + auto &children = list_node->GetOrderedChildNodes(); + children.resize(len); + for(size_t i = 0; i < len; i++) + children[i] = enm->AllocNode(ENT_STRING, arr[i]); + + return list_node; +} + +void EntityExternalInterface::SetStringList(std::string &handle, std::string &label, char **arr, size_t len) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return; + + EvaluableNode *list_node = NodifyStringList(bundle->entity, arr, len); + EvaluableNodeReference list_node_reference(list_node, true); + bundle->SetEntityValueAtLabel(label, list_node_reference); +} + +bool EntityExternalInterface::SetJSONToLabel(std::string &handle, std::string &label, std::string_view json) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return false; + + EvaluableNode *node = EvaluableNodeJSONTranslation::JsonToEvaluableNode(&bundle->entity->evaluableNodeManager, json); + EvaluableNodeReference node_reference(node, true); + bool success = bundle->SetEntityValueAtLabel(label, node_reference); + return success; +} + +std::string EntityExternalInterface::GetJSONFromLabel(std::string &handle, std::string &label) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return ""; + + EvaluableNode *label_val = bundle->entity->GetValueAtLabel(label, nullptr, false); + return EvaluableNodeJSONTranslation::EvaluableNodeToJson(label_val); +} + +std::string EntityExternalInterface::ExecuteEntityJSON(std::string &handle, std::string &label, std::string_view json) +{ + auto bundle = FindEntityBundle(handle); + if(bundle == nullptr) + return ""; + + EvaluableNodeManager *enm = &bundle->entity->evaluableNodeManager; + EvaluableNodeReference args = EvaluableNodeReference(EvaluableNodeJSONTranslation::JsonToEvaluableNode(enm, json), true); + + auto call_stack = Interpreter::ConvertArgsToCallStack(args, enm); + + ExecutionCycleCount max_num_steps = 0, num_steps_executed = 0; + size_t max_num_nodes = 0, num_nodes_allocated = 0; + EvaluableNodeReference returned_value = bundle->entity->Execute(max_num_steps, num_steps_executed, max_num_nodes, + num_nodes_allocated, &bundle->writeListeners, bundle->printListener, call_stack, false, enm, + #ifdef MULTITHREAD_SUPPORT + nullptr, + #endif + label); + + //ConvertArgsToCallStack always adds an outer list that is safe to free + enm->FreeNode(call_stack); + + std::string result = EvaluableNodeJSONTranslation::EvaluableNodeToJson(returned_value); + enm->FreeNodeTreeIfPossible(returned_value); + return result; +} + +bool EntityExternalInterface::EntityListenerBundle::SetEntityValueAtLabel(std::string &label_name, EvaluableNodeReference new_value) +{ + StringInternPool::StringID label_sid = string_intern_pool.GetIDFromString(label_name); + +#ifdef MULTITHREAD_SUPPORT + auto write_lock = entity->CreateEntityLock(); + entity->SetRoot(entity->GetRoot(), false); +#endif + + bool success = entity->SetValueAtLabel(label_sid, new_value, false, &writeListeners); + + entity->evaluableNodeManager.FreeNodeTreeIfPossible(new_value); + + return success; +} diff --git a/src/Amalgam/entity/EntityExternalInterface.h b/src/Amalgam/entity/EntityExternalInterface.h new file mode 100644 index 00000000..0c39ef4f --- /dev/null +++ b/src/Amalgam/entity/EntityExternalInterface.h @@ -0,0 +1,242 @@ +#pragma once + +//project headers: +#include "AssetManager.h" +#include "Entity.h" +#include "EntityWriteListener.h" +#include "HashMaps.h" +#include "PrintListener.h" + +//system headers: +#include +#include + +/* + * This class constitutes the C++ backing for the C API, and is fully functional as a C++ API. + * + * Amalgam functions through the use of "Entities" which will have a predetermined set of "labels". + * Loading an .amlg file with the LoadEntity command will assign the entity to a given handle. + * The majority of the methods provided here allow manipulation of data associated with a label within an entity. + * Some labels will be loaded with functions which can be executed (refer to the instructions for the entity you loaded). + */ + +class EntityExternalInterface +{ +public: + bool LoadEntity(std::string &handle, std::string &path, bool persistent, bool load_contained_entities, + std::string &write_log_filename, std::string &print_log_filename, std::string rand_seed = std::string("")); + void StoreEntity(std::string &handle, std::string &path, bool update_persistence_location, bool store_contained_entities); + void ExecuteEntity(std::string &handle, std::string &label); + void DeleteEntity(std::string &handle); + bool SetRandomSeed(std::string &handle, std::string &rand_seed); + std::vector GetEntities(); + + void AppendToLabel(std::string &handle, std::string &label, double value); + void AppendToLabel(std::string &handle, std::string &label, std::string &value); + + void SetLabel(std::string &handle, std::string &label, double value); + void SetLabel(std::string &handle, std::string &label, std::string &value); + + double GetNumber(std::string &handle, std::string &label); + std::string GetString(std::string &handle, std::string &label); + std::string GetStringFromList(std::string &handle, std::string &label, size_t index); + + size_t GetNumberListLength(std::string &handle, std::string &label); + void GetNumberList(std::string &handle, std::string &label, double *out_arr, size_t len); + void GetNumberList(EvaluableNode *label_val, double *out_arr, size_t len); + void SetNumberList(std::string &handle, std::string &label, double *arr, size_t len); + void AppendNumberList(std::string &handle, std::string &label, double *arr, size_t len); + + size_t GetNumberMatrixWidth(std::string &handle, std::string &label); + size_t GetNumberMatrixHeight(std::string &handle, std::string &label); + void GetNumberMatrix(std::string &handle, std::string &label, double *out_arr, size_t w, size_t h); + void SetNumberMatrix(std::string &handle, std::string &label, double *arr, size_t w, size_t h); + + size_t GetStringListLength(std::string &handle, std::string &label); + void GetStringList(std::string &handle, std::string &label, std::string *out_arr, size_t len); + void SetStringList(std::string &handle, std::string &label, char **arr, size_t len); + + bool SetJSONToLabel(std::string &handle, std::string &label, std::string_view json); + std::string GetJSONFromLabel(std::string &handle, std::string &label); + std::string ExecuteEntityJSON(std::string &handle, std::string &label, std::string_view json); + +protected: + + //a class that manages the entity + // when the bundle is destroyed, everything in it is also destroyed + class EntityListenerBundle + { + public: + EntityListenerBundle(Entity *ent, std::vector wl, PrintListener *pl = nullptr) + { + entity = ent; + writeListeners = wl; + printListener = pl; + } + + ~EntityListenerBundle() + { + if(entity != nullptr) + { + asset_manager.DestroyEntity(entity); + delete entity; + } + + if(printListener != nullptr) + delete printListener; + if(writeListeners.size() > 0 && writeListeners[0] != nullptr) + delete writeListeners[0]; + } + + //Wraps around Entity::SetValueAtLabel but accepts a string for label name + bool SetEntityValueAtLabel(std::string &label_name, EvaluableNodeReference new_value); + + //the type of mutex is dependent on whether individual entities can be accessed concurrently + #ifdef MULTITHREAD_INTERFACE + #ifdef MULTITHREAD_ENTITY_CALL_MUTEX + Concurrency::SingleMutex mutex; + #else + Concurrency::ReadWriteMutex mutex; + #endif + #endif + + Entity *entity; + std::vector writeListeners; + PrintListener *printListener; + }; + + class EntityListenerBundleReadReference + { + public: + EntityListenerBundleReadReference(EntityListenerBundle *entity_listener_bundle) + { + entityListenerBundle = entity_listener_bundle; + + #ifdef MULTITHREAD_INTERFACE + if(entityListenerBundle != nullptr) + { + #ifdef MULTITHREAD_ENTITY_CALL_MUTEX + lock = Concurrency::SingleLock(entityListenerBundle->mutex); + #else + readLock = Concurrency::ReadLock(entityListenerBundle->mutex); + #endif + } + #endif + } + + //allow to use as an EntityListenerBundle * + constexpr operator EntityListenerBundle *() + { return entityListenerBundle; } + + //allow to use as an EntityListenerBundle * + constexpr EntityListenerBundle *operator->() + { return entityListenerBundle; } + + EntityListenerBundle *entityListenerBundle; + + //the type of mutex is dependent on whether individual entities can be accessed concurrently + #ifdef MULTITHREAD_INTERFACE + #ifdef MULTITHREAD_ENTITY_CALL_MUTEX + Concurrency::SingleLock lock; + #else + Concurrency::ReadLock readLock; + #endif + #endif + }; + + class EntityListenerBundleWriteReference + { + public: + EntityListenerBundleWriteReference(EntityListenerBundle *entity_listener_bundle) + { + entityListenerBundle = entity_listener_bundle; + + #ifdef MULTITHREAD_INTERFACE + if(entityListenerBundle != nullptr) + { + #ifdef MULTITHREAD_ENTITY_CALL_MUTEX + lock = Concurrency::SingleLock(entityListenerBundle->mutex); + #else + writeLock = Concurrency::WriteLock(entityListenerBundle->mutex); + #endif + } + #endif + } + + //allow to use as an EntityListenerBundle * + constexpr operator EntityListenerBundle *() + { return entityListenerBundle; } + + //allow to use as an EntityListenerBundle * + constexpr EntityListenerBundle *operator->() + { return entityListenerBundle; } + + EntityListenerBundle *entityListenerBundle; + + //the type of mutex is dependent on whether individual entities can be accessed concurrently + #ifdef MULTITHREAD_INTERFACE + #ifdef MULTITHREAD_ENTITY_CALL_MUTEX + Concurrency::SingleLock lock; + #else + Concurrency::WriteLock writeLock; + #endif + #endif + }; + + //looks up the bundle and returns it, will return nullptr if not found + inline EntityListenerBundleReadReference FindEntityBundle(std::string &handle) + { + #ifdef MULTITHREAD_INTERFACE + Concurrency::ReadLock read_lock(mutex); + #endif + + auto bundle_handle = handleToBundle.find(handle); + if(bundle_handle == end(handleToBundle) || bundle_handle->second == nullptr) + return nullptr; + + return EntityListenerBundleReadReference(bundle_handle->second); + } + + //adds a new bundle under the name handle + // will delete any if it already exists + inline void AddEntityBundle(std::string &handle, EntityListenerBundle *bundle) + { + #ifdef MULTITHREAD_INTERFACE + Concurrency::WriteLock write_lock(mutex); + #endif + + const auto &[bundle_handle, bundle_inserted] = handleToBundle.emplace(handle, bundle); + if(!bundle_inserted) + { + //erase the previous + if(bundle_handle->second != nullptr) + delete bundle_handle->second; + + //overwrite + bundle_handle->second = bundle; + } + } + + //erases the handle and returns the bundle reference. Returns nullptr if not found. + inline void EraseEntityBundle(std::string &handle) + { + #ifdef MULTITHREAD_INTERFACE + Concurrency::WriteLock write_lock(mutex); + #endif + + auto bundle_handle = handleToBundle.find(handle); + if(bundle_handle == end(handleToBundle) || bundle_handle->second == nullptr) + return; + + handleToBundle.erase(handle); + delete bundle_handle->second; + } + + //for concurrent reading and writing the interface management data below +#ifdef MULTITHREAD_INTERFACE + Concurrency::ReadWriteMutex mutex; +#endif + + //map between entity name and the bundle of the entity and its listeners, etc. + FastHashMap handleToBundle; +}; diff --git a/src/Amalgam/entity/EntityManipulation.cpp b/src/Amalgam/entity/EntityManipulation.cpp new file mode 100644 index 00000000..1f1d2aab --- /dev/null +++ b/src/Amalgam/entity/EntityManipulation.cpp @@ -0,0 +1,777 @@ +//project headers: +#include "EntityManipulation.h" + +#include "Entity.h" +#include "EvaluableNodeTreeDifference.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "Merger.h" + +Entity *EntityManipulation::EntitiesMergeMethod::MergeValues(Entity *a, Entity *b, bool must_merge) +{ + if(a == nullptr && b == nullptr) + return nullptr; + + //create new entity + Entity *merged_entity = new Entity(); + if(a != nullptr) + merged_entity->SetRandomStream(a->GetRandomStream()); + else if(b != nullptr) + merged_entity->SetRandomStream(b->GetRandomStream()); + + //merge entitys' code + EvaluableNode *code_a = (a != nullptr ? a->GetRoot().reference : nullptr); + EvaluableNode *code_b = (b != nullptr ? b->GetRoot().reference : nullptr); + + EvaluableNodeTreeManipulation::NodesMergeMethod mm(&merged_entity->evaluableNodeManager, keepAllOfBoth, true); + EvaluableNode *result = mm.MergeValues(code_a, code_b); + EvaluableNodeManager::UpdateFlagsForNodeTree(result); + merged_entity->SetRoot(result, true); + + MergeContainedEntities(this, a, b, merged_entity); + + return merged_entity; +} + +////////////////////////////// + +Entity *EntityManipulation::EntitiesMergeForDifferenceMethod::MergeValues(Entity *a, Entity *b, bool must_merge) +{ + if(a == nullptr && b == nullptr) + return nullptr; + + //create new entity + Entity *result = new Entity(); + + //compare entitys' code + EvaluableNode *code_a = (a != nullptr ? a->GetRoot().reference : nullptr); + EvaluableNode *code_b = (b != nullptr ? b->GetRoot().reference : nullptr); + + if(a != nullptr) + aEntitiesIncludedFromB[b] = a; + if(b != nullptr) + { + bool identical_code = EvaluableNode::AreDeepEqual(code_a, code_b); + mergedEntitiesIncludedFromB[b] = std::pair(result, identical_code); + } + + MergeContainedEntities(this, a, b, result); + + return result; +} + +////////////////////////////// + +EntityManipulation::EntitiesMixMethod::EntitiesMixMethod(Interpreter *_interpreter, + double fraction_a, double fraction_b, double similar_mix_chance, double fraction_entities_to_mix) + : EntitiesMergeMethod(_interpreter, true) +{ + interpreter = _interpreter; + + //clamp each to the appropriate range, 0 to 1 for fractions, -1 to 1 for similarMixChance + if(FastIsNaN(fraction_a)) + fractionA = 0.0; + else + fractionA = std::min(1.0, std::max(0.0, fraction_a)); + + if(FastIsNaN(fraction_b)) + fractionB = 0.0; + else + fractionB = std::min(1.0, std::max(0.0, fraction_b)); + + fractionAOrB = fractionA + fractionB - fractionA * fractionB; + fractionAInsteadOfB = fractionA / (fractionA + fractionB); + + if(FastIsNaN(similar_mix_chance)) + similarMixChance = 0.0; + else + similarMixChance = std::min(1.0, std::max(-1.0, similar_mix_chance)); + + if(FastIsNaN(fraction_entities_to_mix)) + fractionEntitiesToMix = 0.0; + else + fractionEntitiesToMix = std::min(1.0, std::max(0.0, fraction_entities_to_mix)); +} + +Entity *EntityManipulation::EntitiesMixMethod::MergeValues(Entity *a, Entity *b, bool must_merge) +{ + if(a == nullptr && b == nullptr) + return nullptr; + + //if the entities aren't required to be merged, then see if they're mergeable + // if so, then merge + // if not, then pick one or none + if(!must_merge) + { + if(!AreMergeable(a, b)) + { + if(KeepNonMergeableValue()) + { + if(KeepNonMergeableAInsteadOfB()) + return new Entity(a); + else + return new Entity(b); + } + + return nullptr; + } + } + + //create new entity to merge into + Entity *merged_entity = new Entity(); + if(a != nullptr) + merged_entity->SetRandomStream(a->GetRandomStream()); + else if(b != nullptr) + merged_entity->SetRandomStream(b->GetRandomStream()); + + //merge entity's code + EvaluableNode *code_a = (a != nullptr ? a->GetRoot().reference : nullptr); + EvaluableNode *code_b = (b != nullptr ? b->GetRoot().reference : nullptr); + + EvaluableNodeTreeManipulation::NodesMixMethod mm(interpreter->randomStream.CreateOtherStreamViaRand(), + &merged_entity->evaluableNodeManager, fractionA, fractionB, similarMixChance); + + EvaluableNode *result = mm.MergeValues(code_a, code_b); + EvaluableNodeManager::UpdateFlagsForNodeTree(result); + merged_entity->SetRoot(result, true); + + MergeContainedEntities(this, a, b, merged_entity); + return merged_entity; +} + +Entity *EntityManipulation::IntersectEntities(Interpreter *interpreter, Entity *entity1, Entity *entity2) +{ + EntitiesMergeMethod mm(interpreter, false); + return mm.MergeValues(entity1, entity2); +} + +Entity *EntityManipulation::UnionEntities(Interpreter *interpreter, Entity *entity1, Entity *entity2) +{ + EntitiesMergeMethod mm(interpreter, true); + return mm.MergeValues(entity1, entity2); +} + +//returns true if root_entity can be deep copied because all contained entities (recursively) are identical to those matched in entities_included +// regardless, it will accumulate contained entities examined into top_entities_identical if can be deep copied and different_entities otherwise +bool IsEntityIdenticalToComparedEntity(Entity *root_entity, CompactHashMap> &entities_included, std::vector &top_entities_identical, std::vector &different_entities) +{ + if(root_entity == nullptr) + return true; + + //if not included, then don't mark this entity for copying at all + auto paired_entity = entities_included.find(root_entity); + if(paired_entity == end(entities_included) || paired_entity->second.first == nullptr) + return false; + + //iterate over all contained entries and recursively check if they are identical, if so, record in a list + std::vector contained_nodes_identical; + bool all_contained_entities_identical = true; + for(auto entity : root_entity->GetContainedEntities()) + { + if(IsEntityIdenticalToComparedEntity(entity, entities_included, top_entities_identical, different_entities)) + contained_nodes_identical.emplace_back(entity); + else + { + all_contained_entities_identical = false; + different_entities.emplace_back(entity); + } + } + + //if the root_entity matches its pair, then can deep copy + if(paired_entity->second.second && all_contained_entities_identical) + return true; + else //something doesn't match, only copy those that are identical, different_entities will contain those entities + { + for(auto &ce : contained_nodes_identical) + top_entities_identical.emplace_back(ce); + return false; + } +} + +EvaluableNodeReference EntityManipulation::DifferenceEntities(Interpreter *interpreter, Entity *entity1, Entity *entity2) +{ + //find commonality + EntitiesMergeForDifferenceMethod mm(interpreter); + Entity *root_merged = mm.MergeValues(entity1, entity2, true); + auto &entity2_to_entity_a = mm.GetAEntitiesIncludedFromB(); + auto &entity2_to_merged_entity = mm.GetMergedEntitiesIncludedFromB(); + + EvaluableNodeManager *enm = interpreter->evaluableNodeManager; + + ////////// + //build code to look like: + // (declare (assoc _ null) + // (let (assoc new_entity (create_entity + // (call (lambda *entity difference code*) + // (assoc _ (get_entity_code _) ) + // ) ) + // + // [for each contained entity specified by the list representing the relative location to _ and new_entity] + // + // [if must be deleted, ignore] + // + // [if must be merged] + // (create_entity + // (append _ *relative id*) + // (call *entity difference code* + // (assoc _ (get_entity_code (append new_entity *relative id*) ) ) + // ) + // + // [if must be created] + // (clone_entity + // (append _ *relative id*) + // (append new_entity *relative id*) + // ) + // + // new_entity + // ) + // ) + + //create: (declare (assoc _ null) ) + EvaluableNode *difference_function = enm->AllocNode(ENT_DECLARE); + + auto node_stack = interpreter->CreateInterpreterNodeStackStateSaver(difference_function); + + EvaluableNode *df_assoc = enm->AllocNode(ENT_ASSOC); + difference_function->AppendOrderedChildNode(df_assoc); + df_assoc->SetMappedChildNode(ENBISI__, enm->AllocNode(ENT_NULL)); + + //find entities that match up, and if no difference, then can shortcut the function + std::vector top_entities_identical; + std::vector different_entities; + if(IsEntityIdenticalToComparedEntity(entity2, entity2_to_merged_entity, top_entities_identical, different_entities)) + { + EvaluableNode *clone_entity = enm->AllocNode(ENT_CLONE_ENTITIES); + difference_function->AppendOrderedChildNode(clone_entity); + clone_entity->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI__)); + delete root_merged; + return EvaluableNodeReference(difference_function, true); + } + + //create the following: + // (declare (assoc _ null) + // (let (assoc new_entity (first (create_entities)) ) ) + // ) + EvaluableNode *let_new_entity = enm->AllocNode(ENT_LET); + difference_function->AppendOrderedChildNode(let_new_entity); + EvaluableNode *let_assoc = enm->AllocNode(ENT_ASSOC); + let_new_entity->AppendOrderedChildNode(let_assoc); + EvaluableNode *create_root_entity = enm->AllocNode(ENT_CREATE_ENTITIES); + EvaluableNode *first_of_create_entity = enm->AllocNode(ENT_FIRST); + first_of_create_entity->AppendOrderedChildNode(create_root_entity); + let_assoc->SetMappedChildNode(ENBISI_new_entity, first_of_create_entity); + + //apply difference in code from source to build: + // (declare (assoc _ null) + // (let (assoc new_entity (first (create_entities + // (call (lambda *entity difference code*) + // (assoc _ (get_entity_code _) ) + // ) ) ) + EvaluableNode *entity_difference_apply_call = enm->AllocNode(ENT_CALL); + create_root_entity->AppendOrderedChildNode(entity_difference_apply_call); + EvaluableNode *lambda_for_difference = enm->AllocNode(ENT_LAMBDA); + entity_difference_apply_call->AppendOrderedChildNode(lambda_for_difference); + EvaluableNode *edac_assoc = enm->AllocNode(ENT_ASSOC); + entity_difference_apply_call->AppendOrderedChildNode(edac_assoc); + EvaluableNode *get_entity_code = enm->AllocNode(ENT_RETRIEVE_ENTITY_ROOT); + edac_assoc->SetMappedChildNode(ENBISI__, get_entity_code); + get_entity_code->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI__)); + + //apply difference function for root entities + // make sure to make a copy of each root so don't end up with mixed entity nodes + lambda_for_difference->AppendOrderedChildNode(EvaluableNodeTreeDifference::DifferenceTrees(enm, + entity1->GetRoot(enm), entity2->GetRoot(enm))); + + //can ensure cycle free only if all different entities are cycle free + // it doesn't matter if identical entities are cycle free because they're just cloned -- the code doesn't show up in the difference + bool cycle_free = true; + for(auto &entity_to_create : different_entities) + { + //create the following code: + // (create_entities + // (append _ *relative id*) + // (call *entity difference code* + // (assoc _ (get_entity_code (append new_entity *relative id*)) ) + // ) + EvaluableNode *src_id_list = GetTraversalIDPathListFromAToB(enm, entity2, entity_to_create); + EvaluableNode *src_append = enm->AllocNode(ENT_APPEND); + src_append->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI__)); + src_append->AppendOrderedChildNode(src_id_list); + + EvaluableNode *dest_id_list = enm->DeepAllocCopy(src_id_list); + EvaluableNode *dest_append = enm->AllocNode(ENT_APPEND); + dest_append->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI_new_entity)); + dest_append->AppendOrderedChildNode(dest_id_list); + + EvaluableNode *create_entity = enm->AllocNode(ENT_CREATE_ENTITIES); + let_new_entity->AppendOrderedChildNode(create_entity); + create_entity->AppendOrderedChildNode(dest_append); + + //if identical to merged, then just copy + auto merged = entity2_to_merged_entity.find(entity_to_create); + if(merged == end(entity2_to_merged_entity) || merged->second.second == true) + { + EvaluableNode *copy_lambda = enm->AllocNode(ENT_LAMBDA); + create_entity->AppendOrderedChildNode(copy_lambda); + copy_lambda->AppendOrderedChildNode(enm->DeepAllocCopy(entity_to_create->GetRoot(), EvaluableNodeManager::ENMM_LABEL_ESCAPE_INCREMENT)); + } + else //need to difference + { + EvaluableNode *call_diff = enm->AllocNode(ENT_CALL); + create_entity->AppendOrderedChildNode(call_diff); + EvaluableNode *call_lambda = enm->AllocNode(ENT_LAMBDA); + call_diff->AppendOrderedChildNode(call_lambda); + + //look up corresponding entity from a, then grab its code + // make sure to make a copy of each root so don't end up with mixed entity nodes + auto entity_from_a = entity2_to_entity_a.find(entity_to_create); + EvaluableNode *a_code = nullptr; + if(entity_from_a != end(entity2_to_entity_a) && entity_from_a->second != nullptr) + a_code = entity_from_a->second->GetRoot(enm); + + EvaluableNode *b_code = entity_to_create->GetRoot(enm); + + //if either entity needs a cycle check, then everything will need to be checked for cycles later + if( (a_code != nullptr && a_code->GetNeedCycleCheck()) + || (b_code != nullptr && b_code->GetNeedCycleCheck()) ) + cycle_free = false; + + EvaluableNode *entity_difference = EvaluableNodeTreeDifference::DifferenceTrees(enm, a_code, b_code); + call_lambda->AppendOrderedChildNode(entity_difference); + + EvaluableNode *call_assoc = enm->AllocNode(ENT_ASSOC); + call_diff->AppendOrderedChildNode(call_assoc); + + EvaluableNode *entity_code = enm->AllocNode(ENT_RETRIEVE_ENTITY_ROOT); + call_assoc->SetMappedChildNode(ENBISI__, entity_code); + entity_code->AppendOrderedChildNode(src_append); + } + } + + //clone any identical parts. since they are effectively leaf nodes they can be all created at the end + for(auto &entity_to_clone : top_entities_identical) + { + //create the following code: + // (clone_entities + // (append _ *relative id*) + // (append new_entity *relative id*) + // ) + EvaluableNode *clone_entity = enm->AllocNode(ENT_CLONE_ENTITIES); + let_new_entity->AppendOrderedChildNode(clone_entity); + + EvaluableNode *src_id_list = GetTraversalIDPathListFromAToB(enm, entity2, entity_to_clone); + EvaluableNode *src_append = enm->AllocNode(ENT_APPEND); + src_append->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI__)); + src_append->AppendOrderedChildNode(src_id_list); + + EvaluableNode *dest_id_list = enm->DeepAllocCopy(src_id_list); + EvaluableNode *dest_append = enm->AllocNode(ENT_APPEND); + dest_append->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI_new_entity)); + dest_append->AppendOrderedChildNode(dest_id_list); + + clone_entity->AppendOrderedChildNode(src_append); + clone_entity->AppendOrderedChildNode(dest_append); + } + + //add new_entity to return value of let statement to return the newly created id + let_new_entity->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI_new_entity)); + + delete root_merged; + + //if anything isn't cycle free, then need to recompute everything + if(!cycle_free) + EvaluableNodeManager::UpdateFlagsForNodeTree(difference_function); + + return EvaluableNodeReference(difference_function, true); +} + +Entity *EntityManipulation::MixEntities(Interpreter *interpreter, Entity *entity1, Entity *entity2, + double fractionA, double fractionB, double similar_mix_chance, double fraction_entities_to_mix) +{ + EntitiesMixMethod mm(interpreter, fractionA, fractionB, similar_mix_chance, fraction_entities_to_mix); + return mm.MergeValues(entity1, entity2, true); +} + +MergeMetricResults EntityManipulation::NumberOfSharedNodes(Entity *entity1, Entity *entity2) +{ + if(entity1 == nullptr || entity2 == nullptr) + return MergeMetricResults(0.0, entity1, entity2, false, false); + + MergeMetricResults commonality(0.0, entity1, entity2); + commonality += EvaluableNodeTreeManipulation::NumberOfSharedNodes(entity1->GetRoot(), entity2->GetRoot()); + + Entity::EntityLookupAssocType entity1_unmatched = CreateContainedEntityLookupByStringId(entity1); + Entity::EntityLookupAssocType entity2_unmatched = CreateContainedEntityLookupByStringId(entity2); + + //find all contained entities that have the same name + std::vector matching_entities(entity1_unmatched.size()); //reserve enough in one block for all in entity1, as an upper bound + for(auto &[e1c_id, _] : entity1_unmatched) + { + if(entity2_unmatched.find(e1c_id) != end(entity2_unmatched)) + matching_entities.emplace_back(e1c_id); + } + + //count up all shared entities and remove from unmatched maps + for(auto &entity_name : matching_entities) + { + commonality += NumberOfSharedNodes(entity1_unmatched[entity_name], entity2_unmatched[entity_name]); + + entity1_unmatched.erase(entity_name); + entity2_unmatched.erase(entity_name); + } + + //pair up all remaining contained entities that don't have matching names + for(auto &[e1c_id, e1c] : entity1_unmatched) + { + //find the node that best matches this one, greedily + bool best_match_found = false; + StringInternPool::StringID best_match_key = StringInternPool::NOT_A_STRING_ID; + MergeMetricResults best_match_value; + for(auto& [e2c_id, e2c] : entity2_unmatched) + { + auto match_value = NumberOfSharedNodes(e1c, e2c); + //entities won't necessarily must-match even if the labels are the same; those are the matching_entities by name covered above + match_value.mustMatch = false; + + if(match_value.IsNontrivialMatch() + && (!best_match_found || match_value > best_match_value) ) + { + best_match_found = true; + best_match_value = match_value; + best_match_key = e2c_id; + + //don't need to check any more + if(match_value.mustMatch) + break; + } + } + + //if found a match, then remove it from the match list and put it in the list + if(best_match_found) + { + //count this for whatever match it is + commonality += best_match_value; + + entity2_unmatched.erase(best_match_key); + } + + } + + return commonality; +} + +double EntityManipulation::EditDistance(Entity *entity1, Entity *entity2) +{ + auto shared_nodes = NumberOfSharedNodes(entity1, entity2); + + double entity_1_size = 0; + if(entity1 != nullptr) + entity_1_size = static_cast(entity1->GetDeepSizeInNodes()); + double entity_2_size = 0; + if(entity2 != nullptr) + entity_2_size = static_cast(entity2->GetDeepSizeInNodes()); + + //find the distance to edit from tree1 to shared, then from shared to tree_2. Shared is the smallest, so subtract from each. + return (entity_1_size - shared_nodes.commonality) + (entity_2_size - shared_nodes.commonality); +} + +void EntityManipulation::MergeContainedEntities(EntitiesMergeMethod *mm, Entity *entity1, Entity *entity2, Entity *merged_entity) +{ + //shortcut for merging empty entities + if(entity1 == nullptr && entity2 == nullptr) + return; + + //shortcut for when requiring intersection of entities + if(!mm->KeepSomeNonMergeableValues() && (entity1 == nullptr || entity2 == nullptr)) + return; + + //any entity that is renamed that may have references is stored here + CompactHashMap entities_renamed; + + //keep track of contained entities to merge + Entity::EntityLookupAssocType entity1_unmatched = CreateContainedEntityLookupByStringId(entity1); + Entity::EntityLookupAssocType entity2_unmatched = CreateContainedEntityLookupByStringId(entity2); + + //find all contained entities that have the same id + std::vector matching_entities; + matching_entities.reserve(entity1_unmatched.size()); //reserve enough in one block for all in entity1 to reduce potential reallocations + for(auto &[_, e1c] : entity1_unmatched) + { + StringInternPool::StringID e1c_id = e1c->GetIdStringId(); + if(entity2_unmatched.find(e1c_id) != end(entity2_unmatched)) + matching_entities.emplace_back(e1c_id); + } + + //merge all shared entities and remove from unmatched contained entities + for(auto &entity_name : matching_entities) + { + merged_entity->AddContainedEntity(mm->MergeValues(entity1_unmatched[entity_name], entity2_unmatched[entity_name], true), entity_name); + entity1_unmatched.erase(entity_name); + entity2_unmatched.erase(entity_name); + } + + //entityX_unmatched only contain entries that do not have matching names + //If mm->KeepAllNonMergeableValues(), then merge named entities against nulls + // Regardless, keep the rest to match up as best as possible + Entity::EntityLookupAssocType entity1_unmatched_unnamed; + Entity::EntityLookupAssocType entity2_unmatched_unnamed; + + for(auto &e : entity1_unmatched) + { + if(Entity::IsNamedEntity(e.first)) + { + Entity *merged = mm->MergeValues(e.second, nullptr, true); + if(merged != nullptr) + merged_entity->AddContainedEntity(merged, e.first); + } + else + entity1_unmatched_unnamed.insert(e); + } + + for(auto &e : entity2_unmatched) + { + if(Entity::IsNamedEntity(e.first)) + { + Entity *merged = mm->MergeValues(nullptr, e.second, true); + if(merged != nullptr) + merged_entity->AddContainedEntity(merged, e.first); + } + else + entity2_unmatched_unnamed.insert(e); + } + + + //merge any remaining entities that didn't have anything to merge with + for(auto &[e1_current_id, e1_current] : entity1_unmatched_unnamed) + { + //find the entity that best matches this one, greedily + bool best_match_found = false; + StringInternPool::StringID best_match_key = StringInternPool::NOT_A_STRING_ID; + MergeMetricResults best_match_value; + for(auto &[e2_current_id, e2_current] : entity2_unmatched_unnamed) + { + auto match_value = NumberOfSharedNodes(e1_current, e2_current); + + if(match_value.IsNontrivialMatch() + && (!best_match_found || match_value > best_match_value) ) + { + best_match_found = true; + best_match_value = match_value; + best_match_key = e2_current_id; + + //have already merged all values that match by name, so if this is an exact match so count it + // to reduce the number of total of comparisons needed + if(best_match_value.exactMatch) + break; + } + } + + //if found a match, then remove it from the match list and put it in the list + if(best_match_found) + { + Entity *merged = mm->MergeValues(e1_current, entity2_unmatched_unnamed[best_match_key], best_match_value.exactMatch); + //only count if it worked + if(merged != nullptr) + { + merged_entity->AddContainedEntity(merged, e1_current_id); //add using id of first to attempt to preserve any references + entities_renamed[best_match_key] = e1_current_id; //remember the replacement + + //merged, so remove from potential merge list + entity2_unmatched_unnamed.erase(best_match_key); + } + } + else //nothing found, merge versus nullptr + { + Entity *merged = mm->MergeValues(e1_current, nullptr, false); + if(merged != nullptr) + merged_entity->AddContainedEntity(merged, e1_current_id); + } + } + + if(mm->KeepAllNonMergeableValues()) + { + //merge anything remaining from entity2_unmatched_unnamed versus nullptr + for(auto &[e_id, e] : entity2_unmatched_unnamed) + { + Entity *merged = mm->MergeValues(nullptr, e, false); + if(merged != nullptr) + merged_entity->AddContainedEntity(merged, e_id); + } + } + + if(entities_renamed.size() > 0) + RecursivelyRenameAllEntityReferences(merged_entity, entities_renamed); +} + +Entity *EntityManipulation::MutateEntity(Interpreter *interpreter, Entity *entity, double mutation_rate, CompactHashMap *mutation_weights, CompactHashMap *operation_type) +{ + if(entity == nullptr) + return nullptr; + + //make a new entity with mutated code + Entity *new_entity = new Entity(); + EvaluableNode *mutated_code = EvaluableNodeTreeManipulation::MutateTree(interpreter, &new_entity->evaluableNodeManager, entity->GetRoot(), mutation_rate, mutation_weights, operation_type); + EvaluableNodeManager::UpdateFlagsForNodeTree(mutated_code); + new_entity->SetRoot(mutated_code, true); + new_entity->SetRandomStream(entity->GetRandomStream()); + + //make mutated copies of all contained entities + for(auto e : entity->GetContainedEntities()) + new_entity->AddContainedEntity(MutateEntity(interpreter, e, mutation_rate, mutation_weights, operation_type), entity->GetIdStringId()); + + return new_entity; +} + +EvaluableNodeReference EntityManipulation::FlattenEntity(Interpreter *interpreter, Entity *entity, bool include_rand_seeds, bool parallel_create) +{ + EvaluableNodeManager *enm = interpreter->evaluableNodeManager; + + ////////// + //build code to look like: + // (let (assoc new_entity (first (create_entities + // (lambda *entity code*) ) + // ) ) ) + // [if include_rand_seeds] + // (set_entity_rand_seed + // new_entity + // *rand seed string* ) + // + // [for each contained entity specified by the list representing the relative location to new_entity] + // [if parallel_create, will group these in ||(parallel ...) by container entity + // + // [if include_rand_seeds] + // (set_entity_rand_seed + // (first + // [always] + // (create_entities + // (append new_entity *relative id*) + // (lambda *entity code*) ) + // (append new_entity *relative id*) + // *rand seed string* ) + // [if include_rand_seeds] + // ) + // *rand seed string* ) + // ) + // ) + + bool cycle_free = true; + auto contained_entities = entity->GetAllDeeplyContainedEntitiesGrouped(); + + EvaluableNode *let_new_entity = enm->AllocNode(ENT_LET); + //preallocate the assoc, set_entity_rand_seed, create and set_entity_rand_seed for each contained entity, then the return new_entity + let_new_entity->ReserveOrderedChildNodes(3 + 2 * contained_entities.size()); + + EvaluableNode *let_assoc = enm->AllocNode(ENT_ASSOC); + let_new_entity->AppendOrderedChildNode(let_assoc); + EvaluableNode *create_root_entity = enm->AllocNode(ENT_CREATE_ENTITIES); + EvaluableNode *first_of_create = enm->AllocNode(ENT_FIRST); + first_of_create->AppendOrderedChildNode(create_root_entity); + let_assoc->SetMappedChildNode(ENBISI_new_entity, first_of_create); + + EvaluableNode *lambda_for_create_root = enm->AllocNode(ENT_LAMBDA); + create_root_entity->AppendOrderedChildNode(lambda_for_create_root); + + EvaluableNodeReference root_copy = entity->GetRoot(enm, EvaluableNodeManager::ENMM_LABEL_ESCAPE_INCREMENT); + lambda_for_create_root->AppendOrderedChildNode(root_copy); + if(root_copy.GetNeedCycleCheck()) + cycle_free = false; + + if(include_rand_seeds) + { + // (set_entity_rand_seed + // new_entity + // *rand seed string* ) + EvaluableNode *set_rand_seed_root = enm->AllocNode(ENT_SET_ENTITY_RAND_SEED); + set_rand_seed_root->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI_new_entity)); + set_rand_seed_root->AppendOrderedChildNode(enm->AllocNode(ENT_STRING, entity->GetRandomState())); + + let_new_entity->AppendOrderedChildNode(set_rand_seed_root); + } + + //where to create new entities into + EvaluableNode *cur_entity_creation_list = let_new_entity; + if(parallel_create) + { + //insert another parallel for the first group of entities + EvaluableNode *parallel_create_node = enm->AllocNode(ENT_PARALLEL); + parallel_create_node->SetConcurrency(true); + + cur_entity_creation_list->AppendOrderedChildNode(parallel_create_node); + cur_entity_creation_list = parallel_create_node; + } + + for(auto &cur_entity : contained_entities) + { + //end of a group of entities + if(cur_entity == nullptr) + { + //if parallel create, then push new entity group + if(parallel_create) + { + //insert another parallel for the this group of entities + EvaluableNode *parallel_create_node = enm->AllocNode(ENT_PARALLEL); + parallel_create_node->SetConcurrency(true); + + let_new_entity->AppendOrderedChildNode(parallel_create_node); + cur_entity_creation_list = parallel_create_node; + } + + //was not an entity, so move on to next + continue; + } + + // (create_entities + // (append new_entity *relative id*) + // (lambda *entity code*) + // ) + EvaluableNode *create_entity = enm->AllocNode(ENT_CREATE_ENTITIES); + + EvaluableNode *src_id_list = GetTraversalIDPathListFromAToB(enm, entity, cur_entity); + EvaluableNode *src_append = enm->AllocNode(ENT_APPEND); + src_append->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI_new_entity)); + src_append->AppendOrderedChildNode(src_id_list); + create_entity->AppendOrderedChildNode(src_append); + + EvaluableNode *lambda_for_create = enm->AllocNode(ENT_LAMBDA); + create_entity->AppendOrderedChildNode(lambda_for_create); + + EvaluableNodeReference contained_root_copy = cur_entity->GetRoot(enm, EvaluableNodeManager::ENMM_LABEL_ESCAPE_INCREMENT); + lambda_for_create->AppendOrderedChildNode(contained_root_copy); + if(contained_root_copy.GetNeedCycleCheck()) + cycle_free = false; + + if(include_rand_seeds) + { + // (set_entity_rand_seed + // (first ...create_entity... ) + // *rand seed string* ) + EvaluableNode *set_rand_seed = enm->AllocNode(ENT_SET_ENTITY_RAND_SEED); + EvaluableNode *first = enm->AllocNode(ENT_FIRST); + set_rand_seed->AppendOrderedChildNode(first); + first->AppendOrderedChildNode(create_entity); + set_rand_seed->AppendOrderedChildNode(enm->AllocNode(ENT_STRING, cur_entity->GetRandomState())); + + //replace the old create_entity with the one surrounded by setting rand seed + create_entity = set_rand_seed; + } + + cur_entity_creation_list->AppendOrderedChildNode(create_entity); + } + + //add new_entity to return value of let statement to return the newly created id + let_new_entity->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI_new_entity)); + + //if anything isn't cycle free, then need to recompute everything + if(!cycle_free) + EvaluableNodeManager::UpdateFlagsForNodeTree(let_new_entity); + + return EvaluableNodeReference(let_new_entity, true); +} + +void EntityManipulation::RecursivelyRenameAllEntityReferences(Entity *entity, CompactHashMap &entities_renamed) +{ + EvaluableNodeTreeManipulation::ReplaceStringsInTree(entity->GetRoot(), entities_renamed); + + for(auto e : entity->GetContainedEntities()) + RecursivelyRenameAllEntityReferences(e, entities_renamed); +} diff --git a/src/Amalgam/entity/EntityManipulation.h b/src/Amalgam/entity/EntityManipulation.h new file mode 100644 index 00000000..f7bcdc50 --- /dev/null +++ b/src/Amalgam/entity/EntityManipulation.h @@ -0,0 +1,175 @@ +#pragma once + +//project headers: +#include "Entity.h" +#include "EvaluableNodeTreeManipulation.h" +#include "Merger.h" + +//Contains various classes and functions to manipulate entities +class EntityManipulation +{ +public: + //functionality to merge two Entities + class EntitiesMergeMethod : public Merger + { + public: + constexpr EntitiesMergeMethod(Interpreter *_interpreter, bool keep_all_of_both) + : interpreter(_interpreter), keepAllOfBoth(keep_all_of_both) + { } + + virtual MergeMetricResults MergeMetric(Entity *a, Entity *b) + { + return NumberOfSharedNodes(a, b); + } + + virtual Entity *MergeValues(Entity *a, Entity *b, bool must_merge = false); + + virtual bool KeepAllNonMergeableValues() + { return keepAllOfBoth; } + + virtual bool KeepSomeNonMergeableValues() + { return keepAllOfBoth; } + + virtual bool KeepNonMergeableValue() + { return keepAllOfBoth; } + + virtual bool KeepNonMergeableAInsteadOfB() + { return keepAllOfBoth; } + + virtual bool KeepNonMergeableA() + { return keepAllOfBoth; } + virtual bool KeepNonMergeableB() + { return keepAllOfBoth; } + + virtual bool AreMergeable(Entity *a, Entity *b) + { return keepAllOfBoth; } + + Interpreter *interpreter; + + protected: + bool keepAllOfBoth; + }; + + //functionality to difference two Entities + // merged entities will *not* contain any code, this is simply for mapping which entities should be merged + class EntitiesMergeForDifferenceMethod : public EntitiesMergeMethod + { + public: + inline EntitiesMergeForDifferenceMethod(Interpreter *_interpreter) + : EntitiesMergeMethod(_interpreter, false) + { } + + virtual Entity *MergeValues(Entity *a, Entity *b, bool must_merge = false); + + constexpr CompactHashMap &GetAEntitiesIncludedFromB() + { return aEntitiesIncludedFromB; } + constexpr CompactHashMap> &GetMergedEntitiesIncludedFromB() + { return mergedEntitiesIncludedFromB; } + + protected: + //key is the entity contained (perhaps deeply) by b + CompactHashMap aEntitiesIncludedFromB; + //key is the entity contained (perhaps deeply) by b + // value is a pair, the first being the entity from the merged entity group and the second being a bool as to whether or not the code is identical + CompactHashMap> mergedEntitiesIncludedFromB; + }; + + //functionality to mix Entities + class EntitiesMixMethod : public EntitiesMergeMethod + { + public: + EntitiesMixMethod(Interpreter *_interpreter, + double fraction_a, double fraction_b, double similar_mix_chance, double fraction_entities_to_mix); + + virtual Entity *MergeValues(Entity *a, Entity *b, bool must_merge); + + virtual bool KeepAllNonMergeableValues() + { return false; } + + virtual bool KeepSomeNonMergeableValues() + { return true; } + + virtual bool KeepNonMergeableValue() + { + return interpreter->randomStream.Rand() < fractionAOrB; + } + + virtual bool KeepNonMergeableAInsteadOfB() + { + return interpreter->randomStream.Rand() < fractionAInsteadOfB; + } + + virtual bool KeepNonMergeableA() + { + return interpreter->randomStream.Rand() < fractionA; + } + virtual bool KeepNonMergeableB() + { + return interpreter->randomStream.Rand() < fractionB; + } + + virtual bool AreMergeable(Entity *a, Entity *b) + { + return interpreter->randomStream.Rand() < fractionEntitiesToMix; + } + + protected: + + double fractionA; + double fractionB; + double fractionAOrB; + double fractionAInsteadOfB; + double similarMixChance; + double fractionEntitiesToMix; + }; + + //Entity merging functions + static Entity *IntersectEntities(Interpreter *interpreter, Entity *entity1, Entity *entity2); + + static Entity *UnionEntities(Interpreter *interpreter, Entity *entity1, Entity *entity2); + + //returns code that will transform entity1 into entity2, allocated with enm + static EvaluableNodeReference DifferenceEntities(Interpreter *interpreter, Entity *entity1, Entity *entity2); + + static Entity *MixEntities(Interpreter *interpreter, Entity *entity1, Entity *entity2, + double fractionA, double fractionB, double similar_mix_chance, double fraction_entities_to_mix); + + //Computes the total number of nodes in both trees that are equal + static MergeMetricResults NumberOfSharedNodes(Entity *entity1, Entity *entity2); + + //computes the edit distance between the two entities + static double EditDistance(Entity *entity1, Entity *entity2); + + static Entity *MutateEntity(Interpreter *interpreter, Entity *entity, double mutation_rate, CompactHashMap *mutation_weights, CompactHashMap *operation_type); + + //flattens entity using interpreter into code that can recreate it + // if include_rand_seeds is true, it will emit code that includes them; otherwise it won't + // if parallel_create is true, it will emit slightly more complex code that creates entities in parallel + static EvaluableNodeReference FlattenEntity(Interpreter *interpreter, Entity *entity, bool include_rand_seeds, bool parallel_create); + +protected: + + //creates an associative lookup of the entities contained by entity from the string id to the entity pointer + static inline Entity::EntityLookupAssocType CreateContainedEntityLookupByStringId(Entity *entity) + { + Entity::EntityLookupAssocType contained_entities_lookup; + if(entity != nullptr) + { + auto &contained_entities = entity->GetContainedEntities(); + contained_entities_lookup.reserve(contained_entities.size()); + for(auto ce : entity->GetContainedEntities()) + contained_entities_lookup.insert(std::make_pair(ce->GetIdStringId(), ce)); + } + return contained_entities_lookup; + } + + //adds to merged_entity's contained entities to consist of Entities that are common across all of the Entities specified + //merged_entity should already have its code merged, as MergeContainedEntities may edit the strings in merged_entity to update + // new names of merged contained entities + static void MergeContainedEntities(EntitiesMergeMethod *mm, Entity *entity1, Entity *entity2, Entity *merged_entity); + + //traverses entity and all contained entities and for each of the entities, finds any string that matches a key of + // entities_renamed and replaces it with the value + //assumes that entity is not nullptr + static void RecursivelyRenameAllEntityReferences(Entity *entity, CompactHashMap &entities_renamed); +}; diff --git a/src/Amalgam/entity/EntityQueries.cpp b/src/Amalgam/entity/EntityQueries.cpp new file mode 100644 index 00000000..3f1d44c7 --- /dev/null +++ b/src/Amalgam/entity/EntityQueries.cpp @@ -0,0 +1,1278 @@ +//project headers: +#include "EntityQueries.h" +#include "Concurrency.h" +#include "EntityQueryManager.h" +#include "EntityQueryCaches.h" +#include "EvaluableNodeTreeFunctions.h" + +bool _enable_SBF_datastore = true; + +#ifdef MULTITHREAD_SUPPORT +Concurrency::ReadWriteMutex EntityQueryManager::queryCacheMutex; +#endif + +FastHashMap> EntityQueryManager::queryCaches; + +size_t EntityQueryManager::maxEntitiesBruteForceSearch = 10; + +bool EntityQueryCondition::DoesEntityMatchCondition(Entity *e) +{ + if(e == nullptr) + return false; + + switch(queryType) + { + case ENT_NULL: + return false; + + case ENT_QUERY_SELECT: + case ENT_QUERY_SAMPLE: + case ENT_QUERY_WEIGHTED_SAMPLE: + case ENT_QUERY_COUNT: + //it does not fail the condition here - needs to be checked elsewhere + return true; + + case ENT_QUERY_IN_ENTITY_LIST: + return std::find(begin(existLabels), end(existLabels), e->GetIdStringId()) != end(existLabels); + + case ENT_QUERY_NOT_IN_ENTITY_LIST: + return std::find(begin(existLabels), end(existLabels), e->GetIdStringId()) == end(existLabels); + + case ENT_QUERY_EXISTS: + for(auto &label : existLabels) + { + if(!e->DoesLabelExist(label)) + return false; + } + return true; + + case ENT_QUERY_NOT_EXISTS: + for(auto &label : existLabels) + { + if(e->DoesLabelExist(label)) + return false; + } + return true; + + case ENT_QUERY_EQUALS: + for(size_t i = 0; i < singleLabels.size(); i++) + { + auto &[label_id, compare_value] = singleLabels[i]; + auto compare_type = valueTypes[i]; + + EvaluableNodeImmediateValue value; + auto value_type = e->GetValueAtLabelAsImmediateValue(label_id, value); + + //needs to exist + if(value_type == ENIVT_NOT_EXIST) + return false; + + if(!EvaluableNodeImmediateValue::AreEqual(compare_type, compare_value, value_type, value)) + return false; + } + return true; + + case ENT_QUERY_NOT_EQUALS: + for(size_t i = 0; i < singleLabels.size(); i++) + { + auto &[label_id, compare_value] = singleLabels[i]; + auto compare_type = valueTypes[i]; + + EvaluableNodeImmediateValue value; + auto value_type = e->GetValueAtLabelAsImmediateValue(label_id, value); + + //needs to exist + if(value_type == ENIVT_NOT_EXIST) + return false; + + if(EvaluableNodeImmediateValue::AreEqual(compare_type, compare_value, value_type, value)) + return false; + } + return true; + + case ENT_QUERY_BETWEEN: + for(size_t i = 0; i < pairedLabels.size(); i++) + { + auto &[label_id, range] = pairedLabels[i]; + + if(valueTypes[i] == ENIVT_NUMBER) + { + double value; + if(!e->GetValueAtLabelAsNumber(label_id, value)) + return false; + + if(value < range.first.number || range.second.number < value) + return false; + } + else if(valueTypes[i] == ENIVT_STRING_ID) + { + StringInternPool::StringID value; + if(!e->GetValueAtLabelAsStringId(label_id, value)) + return false; + + if(StringNaturalCompare(value, range.first.stringID) <= 0 || StringNaturalCompare(range.second.stringID, value) <= 0) + return false; + } + } + return true; + + case ENT_QUERY_NOT_BETWEEN: + for(size_t i = 0; i < pairedLabels.size(); i++) + { + auto &[label_id, range] = pairedLabels[i]; + + if(valueTypes[i] == ENIVT_NUMBER) + { + double value; + if(!e->GetValueAtLabelAsNumber(label_id, value)) + return false; + + if(value >= range.first.number && range.second.number >= value) + return false; + } + else if(valueTypes[i] == ENIVT_STRING_ID) + { + StringInternPool::StringID value; + if(!e->GetValueAtLabelAsStringId(label_id, value)) + return false; + + if(StringNaturalCompare(value, range.first.stringID) > 0 && StringNaturalCompare(range.second.stringID, value) > 0) + return false; + } + } + return true; + + case ENT_QUERY_AMONG: + { + EvaluableNodeImmediateValue value; + auto value_type = e->GetValueAtLabelAsImmediateValue(singleLabel, value); + + if(value_type == ENIVT_NOT_EXIST) + return false; + + for(size_t i = 0; i < valueToCompare.size(); i++) + { + //make sure same type + if(value_type != valueTypes[i]) + return false; + + if(EvaluableNodeImmediateValue::AreEqual(value_type, value, valueTypes[i], valueToCompare[i])) + return true; + } + + return false; + } + + case ENT_QUERY_NOT_AMONG: + { + EvaluableNodeImmediateValue value; + auto value_type = e->GetValueAtLabelAsImmediateValue(singleLabel, value); + + if(value_type == ENIVT_NOT_EXIST) + return false; + + for(size_t i = 0; i < valueToCompare.size(); i++) + { + //make sure same type + if(value_type != valueTypes[i]) + return false; + + if(EvaluableNodeImmediateValue::AreEqual(value_type, value, valueTypes[i], valueToCompare[i])) + return false; + } + + return true; + } + + case ENT_QUERY_MAX: + case ENT_QUERY_MIN: + case ENT_QUERY_SUM: + case ENT_QUERY_MODE: + case ENT_QUERY_QUANTILE: + case ENT_QUERY_GENERALIZED_MEAN: + case ENT_QUERY_MIN_DIFFERENCE: + case ENT_QUERY_MAX_DIFFERENCE: + case ENT_QUERY_VALUE_MASSES: + //it does not fail the condition here - needs to be checked elsewhere + return true; + + case ENT_QUERY_WITHIN_GENERALIZED_DISTANCE: + { + std::vector position(positionLabels.size()); + std::vector position_types(positionLabels.size()); + for(size_t i = 0; i < positionLabels.size(); i++) + { + position_types[i] = e->GetValueAtLabelAsImmediateValue(positionLabels[i], position[i]); + if(position_types[i] == ENIVT_NOT_EXIST) + return false; + } + + double radius = 0.0; + if(singleLabel != StringInternPool::NOT_A_STRING_ID) + { + double value; + if(e->GetValueAtLabelAsNumber(singleLabel, value)) + radius = value; + } + + double distance = distParams.ComputeMinkowskiDistance(position, position_types, valueToCompare, valueTypes); + if(distance - radius > maxDistance) + return false; + + return true; + } + + case ENT_QUERY_NEAREST_GENERALIZED_DISTANCE: + //it does not fail the condition here - needs to be checked elsewhere + return true; + + case ENT_COMPUTE_ENTITY_CONVICTIONS: + case ENT_COMPUTE_ENTITY_KL_DIVERGENCES: + case ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE: + case ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS: + return false; + + default: + // eliminates compiler warnings on clang. + break; + } + + return false; +} + +double EntityQueryCondition::GetConditionDistanceMeasure(Entity *e) +{ + if(e == nullptr) + return std::numeric_limits::quiet_NaN(); + + //make sure not excluding this entity + if(e->GetIdStringId() == exclusionLabel) + return std::numeric_limits::quiet_NaN(); + + std::vector position(positionLabels.size()); + std::vector position_types(positionLabels.size()); + for(size_t i = 0; i < positionLabels.size(); i++) + { + position_types[i] = e->GetValueAtLabelAsImmediateValue(positionLabels[i], position[i]); + if(position_types[i] == ENIVT_NOT_EXIST) + return std::numeric_limits::quiet_NaN(); + } + + double radius = 0.0; + if(singleLabel != StringInternPool::NOT_A_STRING_ID) + { + double value; + if(e->GetValueAtLabelAsNumber(singleLabel, value)) + radius = value; + } + + double distance = distParams.ComputeMinkowskiDistance(position, position_types, valueToCompare, valueTypes); + return distance - radius; +} + +EvaluableNodeReference EntityQueryCondition::GetMatchingEntities(Entity *container, + std::vector &matching_entities, bool from_all_entities, EvaluableNodeManager *enm) +{ + if(from_all_entities) + { + //if the specific entities are specified, then just use those + if(queryType == ENT_QUERY_IN_ENTITY_LIST) + { + //only need to select those from within the list + matching_entities.reserve(existLabels.size()); + for(auto &entity_sid : existLabels) + { + auto entity = container->GetContainedEntity(entity_sid); + if(entity != nullptr) + matching_entities.push_back(entity); + } + return EvaluableNodeReference::Null(); + } + + //else, start with all entities + matching_entities.reserve(container->GetContainedEntities().size()); + for(auto entity : container->GetContainedEntities()) + matching_entities.push_back(entity); + } + + switch(queryType) + { + case ENT_QUERY_SELECT: + { + //regardless of options, need to sort entities by entity id + EntityQueryManager::SortEntitiesByID(matching_entities); + + size_t start_offset = std::min(matching_entities.size(), startOffset); + size_t num_to_select = std::min(matching_entities.size() - start_offset, static_cast(maxToRetrieve)); + + if(num_to_select == 0) + { + matching_entities.clear(); + return EvaluableNodeReference::Null(); + } + + if(hasRandomStream) + { + size_t num_entities = matching_entities.size(); + if(hasStartOffset) + { + //shuffle all because we don't know what the starting offset will be and some values may be swapped with others + for(size_t i = 0; i < num_entities; i++) + { + size_t index_to_swap = randomStream.RandSize(num_entities); + std::swap(matching_entities[i], matching_entities[index_to_swap]); + } + } + else //no start offset, only need to shuffle the number to be returned; don't worry about the rest because this sequence won't be resumed + { + for(size_t i = 0; i < num_to_select; i++) + { + size_t index_to_swap = randomStream.RandSize(num_entities); + std::swap(matching_entities[i], matching_entities[index_to_swap]); + } + } + } + + //remove any off the front based on start offset + if(hasStartOffset) + matching_entities.erase(begin(matching_entities), begin(matching_entities) + start_offset); + + //cut off everything but the number requested + matching_entities.resize(num_to_select); + return EvaluableNodeReference::Null(); + } + + case ENT_QUERY_SAMPLE: + { + size_t num_entities = matching_entities.size(); + size_t num_to_sample = static_cast(maxToRetrieve); + + if(num_entities == 0 || num_to_sample == 0) + { + matching_entities.clear(); + return EvaluableNodeReference::Null(); + } + + std::vector samples; + samples.reserve(num_to_sample); + + //obtain random stream either from the condition or use a default one + RandomStream random_stream; + if(hasRandomStream) + random_stream = randomStream.CreateOtherStreamViaRand(); + else //just use a random seed + random_stream.SetState("12345"); + + //select num_to_select entities and save them in the sample vector + for(size_t i = 0; i < num_to_sample; i++) + { + size_t index_to_swap = randomStream.RandSize(num_entities); + Entity *selected = matching_entities[index_to_swap]; + samples.emplace_back(selected); + } + + //swap samples vector with the matching_entities + std::swap(matching_entities, samples); + return EvaluableNodeReference::Null(); + } + + case ENT_QUERY_WEIGHTED_SAMPLE: + { + size_t num_entities = matching_entities.size(); + size_t num_to_sample = static_cast(maxToRetrieve); + auto weight_label_id = singleLabel; + + if(num_entities == 0 || num_to_sample == 0) + { + matching_entities.clear(); + return EvaluableNodeReference::Null(); + } + + //retrieve weights + std::vector entity_weights; + entity_weights.reserve(num_to_sample); + + //retrieve and accumulate weights + for(size_t i = 0; i < matching_entities.size(); i++) + { + double value; + Entity *e = matching_entities[i]; + if(e != nullptr && e->GetValueAtLabelAsNumber(weight_label_id, value)) + { + if(FastIsNaN(value)) + value = 0.0; + + entity_weights.push_back(value); + } + else + { + entity_weights.push_back(0.0); + } + } + + //obtain random stream either from the condition or use a default one + RandomStream random_stream; + if(hasRandomStream) + random_stream = randomStream.CreateOtherStreamViaRand(); + else //just use a random seed + random_stream.SetState("12345"); + + std::vector samples; + samples.reserve(num_to_sample); + + //if just one sample, brute-force it + if(num_to_sample == 1) + { + size_t selected_index = WeightedDiscreteRandomSample(entity_weights, random_stream, true); + Entity *selected = matching_entities[selected_index]; + samples.emplace_back(selected); + } + else //build temporary cache and query + { + WeightedDiscreteRandomStreamTransform wdrst(matching_entities, entity_weights, true); + for(size_t i = 0; i < num_to_sample; i++) + samples.emplace_back(wdrst.WeightedDiscreteRand(random_stream)); + } + + //swap samples vector with the matching_entities + std::swap(matching_entities, samples); + return EvaluableNodeReference::Null(); + } + + case ENT_QUERY_COUNT: + { + //not useful unless computing + if(enm == nullptr) + return EvaluableNodeReference::Null(); + + return EvaluableNodeReference(enm->AllocNode(static_cast(matching_entities.size())), true); + } + + case ENT_QUERY_EXISTS: + { + //find those that match + for(size_t i = 0; i < matching_entities.size(); i++) + { + //if it doesn't match the condition, then remove it + if(!DoesEntityMatchCondition(matching_entities[i])) + { + matching_entities.erase(begin(matching_entities) + i); + i--; + } + } + + if(enm == nullptr) + return EvaluableNodeReference::Null(); + + //get values for each entity + EvaluableNode *query_return = enm->AllocNode(ENT_ASSOC); + query_return->ReserveMappedChildNodes(matching_entities.size()); + for(size_t i = 0; i < matching_entities.size(); i++) + { + if(matching_entities[i] == nullptr) + continue; + + //create assoc for values for each entity + StringInternPool::StringID entity_sid = matching_entities[i]->GetIdStringId(); + EvaluableNode *entity_values = enm->AllocNode(ENT_ASSOC); + entity_values->ReserveMappedChildNodes(existLabels.size()); + query_return->SetMappedChildNode(entity_sid, entity_values); + + //get values + auto &exist_labels = existLabels; + string_intern_pool.CreateStringReferences(exist_labels); + for(auto label_sid : exist_labels) + entity_values->SetMappedChildNodeWithReferenceHandoff(label_sid, matching_entities[i]->GetValueAtLabel(label_sid, enm, false)); + } + + return EvaluableNodeReference(query_return, true); + } + + case ENT_QUERY_MAX: + case ENT_QUERY_MIN: + { + //get values for each entity + std::vector> entity_values; + entity_values.reserve(matching_entities.size()); + for(size_t i = 0; i < matching_entities.size(); i++) + { + if(matching_entities[i] == nullptr) + continue; + + EvaluableNodeImmediateValue value; + auto value_type = matching_entities[i]->GetValueAtLabelAsImmediateValue(singleLabel, value); + + if(value_type == singleLabelType) + entity_values.push_back(std::make_pair(matching_entities[i], value)); + } + + //sort entites by value + if(queryType == ENT_QUERY_MIN) + { + if(singleLabelType == ENIVT_NUMBER) + { + std::sort(begin(entity_values), end(entity_values), + [](std::pair a, std::pair b) -> bool + { return a.second.number < b.second.number; }); + } + else if(singleLabelType == ENIVT_STRING_ID) + { + std::sort(begin(entity_values), end(entity_values), + [](std::pair a, std::pair b) -> bool + { return StringIDNaturalCompareSort(a.second.stringID, b.second.stringID); }); + } + } + else //ENT_QUERY_MAX + { + if(singleLabelType == ENIVT_NUMBER) + { + std::sort(begin(entity_values), end(entity_values), + [](std::pair a, std::pair b) -> bool + { return a.second.number > b.second.number; }); + } + else if(singleLabelType == ENIVT_STRING_ID) + { + std::sort(begin(entity_values), end(entity_values), + [](std::pair a, std::pair b) -> bool + { return StringIDNaturalCompareSortReverse(a.second.stringID, b.second.stringID); }); + } + } + + //delete elements beyond the number to keep + size_t num_to_keep = std::min(static_cast(maxToRetrieve), entity_values.size()); + entity_values.erase(begin(entity_values) + num_to_keep, end(entity_values)); + + //only copy over entities to keep + matching_entities.resize(entity_values.size()); + for(size_t i = 0; i < entity_values.size(); i++) + matching_entities[i] = entity_values[i].first; + + return EvaluableNodeReference::Null(); + } + + case ENT_QUERY_SUM: + case ENT_QUERY_MODE: + case ENT_QUERY_QUANTILE: + case ENT_QUERY_GENERALIZED_MEAN: + case ENT_QUERY_MIN_DIFFERENCE: + case ENT_QUERY_MAX_DIFFERENCE: + { + //not useful unless computing + if(enm == nullptr) + return EvaluableNodeReference::Null(); + + auto get_value = [matching_entities, this] + (size_t i, double &value) + { + return matching_entities[i]->GetValueAtLabelAsNumber(singleLabel, value); + }; + + auto get_weight = [matching_entities, this] + (size_t i, double &weight_value) + { + return matching_entities[i]->GetValueAtLabelAsNumber(weightLabel, weight_value); + }; + + switch(queryType) + { + case ENT_QUERY_SUM: + { + double sum = EntityQueriesStatistics::Sum(0, matching_entities.size(), get_value, + weightLabel != StringInternPool::NOT_A_STRING_ID, get_weight); + return EvaluableNodeReference(enm->AllocNode(sum), true); + } + case ENT_QUERY_MODE: + { + if(singleLabelType == ENIVT_NUMBER) + { + double mode = EntityQueriesStatistics::ModeNumber(0, matching_entities.size(), get_value, + weightLabel != StringInternPool::NOT_A_STRING_ID, get_weight); + return EvaluableNodeReference(enm->AllocNode(mode), true); + } + else if(singleLabelType == ENIVT_STRING_ID) + { + auto get_string_value = [matching_entities, this] + (size_t i, StringInternPool::StringID &value) + { + return matching_entities[i]->GetValueAtLabelAsStringId(singleLabel, value); + }; + + auto [found, mode_id] = EntityQueriesStatistics::ModeStringId( + 0, matching_entities.size(), get_string_value, true, get_weight); + + if(found) + return EvaluableNodeReference(enm->AllocNode(ENT_STRING, mode_id), true); + else + return EvaluableNodeReference::Null(); + } + break; + } + case ENT_QUERY_QUANTILE: + { + std::vector> values_buffer; + double quantile = EntityQueriesStatistics::Quantile(0, matching_entities.size(), get_value, + weightLabel != StringInternPool::NOT_A_STRING_ID, get_weight, qPercentage, values_buffer); + return EvaluableNodeReference(enm->AllocNode(quantile), true); + } + case ENT_QUERY_GENERALIZED_MEAN: + { + double generalized_mean = EntityQueriesStatistics::GeneralizedMean(0, matching_entities.size(), get_value, + weightLabel != StringInternPool::NOT_A_STRING_ID, get_weight, distParams.pValue, center, calculateMoment, absoluteValue); + return EvaluableNodeReference(enm->AllocNode(generalized_mean), true); + } + case ENT_QUERY_MIN_DIFFERENCE: + case ENT_QUERY_MAX_DIFFERENCE: + { + std::vector values_buffer; + double extreme_value = EntityQueriesStatistics::ExtremeDifference(0, matching_entities.size(), get_value, + queryType == ENT_QUERY_MIN_DIFFERENCE, maxDistance, includeZeroDifferences, values_buffer); + return EvaluableNodeReference(enm->AllocNode(extreme_value), true); + } + default: + break; + } + + return EvaluableNodeReference::Null(); + } + + case ENT_QUERY_VALUE_MASSES: + { + //not useful unless computing + if(enm == nullptr) + return EvaluableNodeReference::Null(); + + if(singleLabelType == ENIVT_NUMBER) + { + auto get_value = [matching_entities, this] + (size_t i, double &value) + { + return matching_entities[i]->GetValueAtLabelAsNumber(singleLabel, value); + }; + + auto get_weight = [matching_entities, this] + (size_t i, double &weight_value) + { + return matching_entities[i]->GetValueAtLabelAsNumber(weightLabel, weight_value); + }; + + auto value_weights = EntityQueriesStatistics::ValueMassesNumber(0, matching_entities.size(), matching_entities.size(), + get_value, weightLabel != StringInternPool::NOT_A_STRING_ID, get_weight); + + EvaluableNode *assoc = enm->AllocNode(ENT_ASSOC); + assoc->ReserveMappedChildNodes(value_weights.size()); + + std::string string_value; + for(auto &[value, weight] : value_weights) + { + string_value = EvaluableNode::NumberToString(value); + assoc->SetMappedChildNode(string_value, enm->AllocNode(weight)); + } + + return EvaluableNodeReference(assoc, true); + } + else if(singleLabelType == ENIVT_STRING_ID) + { + auto get_value = [matching_entities, this] + (size_t i, StringInternPool::StringID &value) + { + return matching_entities[i]->GetValueAtLabelAsStringId(singleLabel, value); + }; + + auto get_weight = [matching_entities, this] + (size_t i, double &weight_value) + { + return matching_entities[i]->GetValueAtLabelAsNumber(weightLabel, weight_value); + }; + + auto value_weights = EntityQueriesStatistics::ValueMassesStringId(0, matching_entities.size(), matching_entities.size(), get_value, + weightLabel != StringInternPool::NOT_A_STRING_ID, get_weight); + + EvaluableNode *assoc = enm->AllocNode(ENT_ASSOC); + assoc->ReserveMappedChildNodes(value_weights.size()); + + for(auto &[value, weight] : value_weights) + assoc->SetMappedChildNode(value, enm->AllocNode(weight)); + + return EvaluableNodeReference(assoc, true); + } + + return EvaluableNodeReference::Null(); + } + + case ENT_QUERY_NEAREST_GENERALIZED_DISTANCE: + { + size_t num_to_keep = std::min(static_cast(maxToRetrieve), matching_entities.size()); + + //get values for each entity + StochasticTieBreakingPriorityQueue> nearest_entities(randomStream.CreateOtherStreamViaRand()); + for(size_t i = 0; i < matching_entities.size(); i++) + { + double value = GetConditionDistanceMeasure(matching_entities[i]); + if(FastIsNaN(value)) + continue; + + nearest_entities.Push(DistanceReferencePair(value, matching_entities[i])); + + if(nearest_entities.Size() > num_to_keep) + nearest_entities.Pop(); + } + + //retrieve the top k cases into entity_values + std::vector> entity_values; + entity_values.reserve(num_to_keep); + for(size_t i = 0; i < num_to_keep && nearest_entities.Size() > 0; i++) + { + auto &dist_ent = nearest_entities.Top(); + entity_values.push_back(DistanceReferencePair(dist_ent.distance, dist_ent.reference)); + + nearest_entities.Pop(); + } + + //reduce matching_entities to only those needed + matching_entities.resize(entity_values.size()); + for(size_t i = 0; i < entity_values.size(); i++) + matching_entities[i] = entity_values[i].reference; + + if(enm == nullptr) + return EvaluableNodeReference::Null(); + + if(distParams.recomputeAccurateDistances) + { + //store state for reversion and overwrite with compute accurate distances + bool old_recalculate_distances_accurately_state = distParams.highAccuracy; + distParams.SetHighAccuracy(true); + + //recompute distance accurately for each found entity result + for(auto &it : entity_values) + it.distance = GetConditionDistanceMeasure(it.reference); + + //revert to original state + distParams.SetHighAccuracy(old_recalculate_distances_accurately_state); + } + + //transform distances as appropriate + EntityQueriesStatistics::DistanceTransform distance_transform(transformSuprisalToProb, + distanceWeightExponent, weightLabel != StringInternPool::NOT_A_STRING_ID, + [this](Entity *e, double &weight_value) { return e->GetValueAtLabelAsNumber(weightLabel, weight_value); }); + + distance_transform.TransformDistances(entity_values, returnSortedList); + + return EntityQueryManager::ConvertResultsToEvaluableNodes(entity_values, + enm, returnSortedList, additionalSortedListLabel, [](auto entity) { return entity; }); + } + + case ENT_QUERY_WITHIN_GENERALIZED_DISTANCE: + { + //find those that match + for(size_t i = 0; i < matching_entities.size(); i++) + { + //if it doesn't match the condition, then remove it + if(!DoesEntityMatchCondition(matching_entities[i])) + { + matching_entities.erase(begin(matching_entities) + i); + i--; + } + } + + if(enm == nullptr) + return EvaluableNodeReference::Null(); + + //compute distances + //Note that this recalculates the distance. Since this is a small number of cases, it shouldn't be a big performance impact -- for larger queries, it will use faster methods + // if this becomes a performance issue, then DoesEntityMatchCondition can be refactored to optionally return the values it computed + std::vector> entity_values; + entity_values.reserve(matching_entities.size()); + for(size_t i = 0; i < matching_entities.size(); i++) + entity_values.push_back(DistanceReferencePair(GetConditionDistanceMeasure(matching_entities[i]), matching_entities[i])); + + //transform distances as appropriate + EntityQueriesStatistics::DistanceTransform distance_transform(transformSuprisalToProb, + distanceWeightExponent, weightLabel != StringInternPool::NOT_A_STRING_ID, + [this](Entity *e, double &weight_value) { return e->GetValueAtLabelAsNumber(weightLabel, weight_value); }); + + distance_transform.TransformDistances(entity_values, returnSortedList); + + return EntityQueryManager::ConvertResultsToEvaluableNodes(entity_values, + enm, returnSortedList, additionalSortedListLabel, [](auto entity) { return entity; }); + } + + default: + for(size_t i = 0; i < matching_entities.size(); i++) + { + //if it doesn't match the condition, then remove it + if(!DoesEntityMatchCondition(matching_entities[i])) + { + matching_entities.erase(begin(matching_entities) + i); + i--; + } + } + + return EvaluableNodeReference::Null(); + } +} + +//returns true if the chain of query conditions can be used in the query caches path (faster queries) +static bool CanUseQueryCaches(std::vector &conditions) +{ + for(size_t i = 0; i < conditions.size(); i++) + { + if(!EntityQueryCaches::DoesCachedConditionMatch(&conditions[i], i + 1 == conditions.size())) + return false; + } + + return true; +} + +EntityQueryCaches *EntityQueryManager::GetQueryCachesForContainer(Entity *container) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(queryCacheMutex); +#endif + + auto found_cache = queryCaches.find(container); + if(found_cache != end(queryCaches)) + return &(*found_cache->second); + +#ifdef MULTITHREAD_SUPPORT + //not found, so need to insert a new one + lock.unlock(); + + Concurrency::WriteLock write_lock(queryCacheMutex); + + //need to double-check to make sure no other thread inserted the cache between the locks + found_cache = queryCaches.find(container); + if(found_cache != end(queryCaches)) + return &(*found_cache->second); +#endif + + //doesn't exist, so insert it and return; it is a double lookup compared to above, + //but it is rare, and it needs a write lock + queryCaches.emplace(container, std::make_unique(container)); + return &(*queryCaches[container]); +} + +EvaluableNodeReference EntityQueryManager::GetMatchingEntitiesFromQueryCaches(Entity *container, + std::vector &conditions, EvaluableNodeManager *enm, bool return_query_value) +{ + //get the label existance cache associated with this container + // use the first condition as an heuristic for building it if it doesn't exist + EntityQueryCaches *entity_caches = GetQueryCachesForContainer(container); + + //starting collection of matching entities, initialized to all entities with the requested labels + // reuse existing buffer + BitArrayIntegerSet &matching_ents = entity_caches->buffers.currentMatchingEntities; + matching_ents.clear(); + + //this will be cleared each iteration + auto &compute_results = entity_caches->buffers.computeResultsIdToValue; + + auto &indices_with_duplicates = entity_caches->buffers.entityIndicesWithDuplicates; + indices_with_duplicates.clear(); + + //execute each query + // for the first condition, matching_ents is empty and must be populated + // for each subsequent loop, matching_ents will have the currently selected entities to query from + for(size_t cond_index = 0; cond_index < conditions.size(); cond_index++) + { + auto &cond = conditions[cond_index]; + bool is_first = (cond_index == 0); + bool is_last = (cond_index == (conditions.size() - 1)); + + //start each condition with cleared compute results as to not reuse the results from a previous computation + compute_results.clear(); + + //if query_none, return results as empty list + if(cond.queryType == ENT_NULL) + return EvaluableNodeReference(enm->AllocNode(ENT_LIST), true); + + switch(cond.queryType) + { + case ENT_QUERY_COUNT: + if(is_first) + return EvaluableNodeReference(enm->AllocNode(static_cast(container->GetNumContainedEntities())), true); + else + return EvaluableNodeReference(enm->AllocNode(static_cast(matching_ents.size())), true); + + case ENT_QUERY_IN_ENTITY_LIST: + { + if(is_first) + { + for(const auto &id : cond.existLabels) + { + size_t entity_index = container->GetContainedEntityIndex(id); + if(entity_index != std::numeric_limits::max()) + matching_ents.insert(entity_index); + } + } + else + { + BitArrayIntegerSet &temp = entity_caches->buffers.tempMatchingEntityIndices; + temp.clear(); + + for(const auto &id : cond.existLabels) + { + size_t entity_index = container->GetContainedEntityIndex(id); + if(matching_ents.contains(entity_index)) + temp.insert(entity_index); + } + + matching_ents.Intersect(temp); + } + + break; + } + + case ENT_QUERY_NOT_IN_ENTITY_LIST: + { + //if first, need to start with all entities + if(is_first) + matching_ents.SetAllIds(container->GetNumContainedEntities()); + + for(const auto &id : cond.existLabels) + { + size_t entity_index = container->GetContainedEntityIndex(id); + matching_ents.erase(entity_index); //note, does nothing if id is already not in matching_ents + } + + break; + } + + case ENT_QUERY_NEAREST_GENERALIZED_DISTANCE: + { + //if excluding an entity, translate it into the index + if(cond.exclusionLabel == string_intern_pool.NOT_A_STRING_ID) + cond.exclusionLabel = std::numeric_limits::max(); + else + cond.exclusionLabel = container->GetContainedEntityIndex(cond.exclusionLabel); + //fall through to cases below + } + + case ENT_QUERY_EXISTS: + case ENT_QUERY_NOT_EXISTS: + case ENT_QUERY_EQUALS: + case ENT_QUERY_NOT_EQUALS: + case ENT_QUERY_BETWEEN: + case ENT_QUERY_NOT_BETWEEN: + case ENT_QUERY_AMONG: + case ENT_QUERY_NOT_AMONG: + case ENT_QUERY_MAX: + case ENT_QUERY_MIN: + case ENT_QUERY_WITHIN_GENERALIZED_DISTANCE: + case ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS: + case ENT_COMPUTE_ENTITY_CONVICTIONS: + case ENT_COMPUTE_ENTITY_KL_DIVERGENCES: + { + entity_caches->GetMatchingEntities(&cond, matching_ents, compute_results, is_first, !is_last || !return_query_value); + break; + } + + case ENT_QUERY_SUM: + case ENT_QUERY_QUANTILE: + case ENT_QUERY_GENERALIZED_MEAN: + case ENT_QUERY_MIN_DIFFERENCE: + case ENT_QUERY_MAX_DIFFERENCE: + case ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE: + { + entity_caches->GetMatchingEntities(&cond, matching_ents, compute_results, is_first, !is_last || !return_query_value); + + if(compute_results.size() > 0) + return EvaluableNodeReference(enm->AllocNode(static_cast(compute_results[0].distance)), true); + else + return EvaluableNodeReference(enm->AllocNode(std::numeric_limits::quiet_NaN()), true); + } + + case ENT_QUERY_MODE: + { + if(cond.singleLabelType == ENIVT_NUMBER) + { + entity_caches->GetMatchingEntities(&cond, matching_ents, compute_results, is_first, !is_last || !return_query_value); + + if(compute_results.size() > 0) + return EvaluableNodeReference(enm->AllocNode(static_cast(compute_results[0].distance)), true); + else + return EvaluableNodeReference(enm->AllocNode(std::numeric_limits::quiet_NaN()), true); + } + else if(cond.singleLabelType == ENIVT_STRING_ID) + { + StringInternPool::StringID mode = string_intern_pool.NOT_A_STRING_ID; + + if(entity_caches->ComputeValueFromMatchingEntities(&cond, matching_ents, mode, is_first)) + return EvaluableNodeReference(enm->AllocNode(ENT_STRING, mode), true); + else + return EvaluableNodeReference::Null(); + } + break; + } + + case ENT_QUERY_VALUE_MASSES: + { + if(cond.singleLabelType == ENIVT_NUMBER) + { + FastHashMap, DoubleNanHashComparator> value_weights; + entity_caches->ComputeValuesFromMatchingEntities(&cond, matching_ents, value_weights, is_first); + + EvaluableNode *assoc = enm->AllocNode(ENT_ASSOC); + assoc->ReserveMappedChildNodes(value_weights.size()); + + std::string string_value; + for(auto &[value, weight] : value_weights) + { + string_value = EvaluableNode::NumberToString(value); + assoc->SetMappedChildNode(string_value, enm->AllocNode(weight)); + } + + return EvaluableNodeReference(assoc, true); + } + else if(cond.singleLabelType == ENIVT_STRING_ID) + { + FastHashMap value_weights; + entity_caches->ComputeValuesFromMatchingEntities(&cond, matching_ents, value_weights, is_first); + + EvaluableNode *assoc = enm->AllocNode(ENT_ASSOC); + assoc->ReserveMappedChildNodes(value_weights.size()); + + for(auto &[value, weight] : value_weights) + assoc->SetMappedChildNode(value, enm->AllocNode(weight)); + + return EvaluableNodeReference(assoc, true); + } + + break; + } + + case ENT_QUERY_SAMPLE: + { + size_t num_entities; + if(is_first) + num_entities = container->GetNumContainedEntities(); + else + num_entities = matching_ents.size(); + + //if matching_ents is empty, there is nothing to select from + if(num_entities == 0) + break; + + size_t num_to_sample = static_cast(cond.maxToRetrieve); + + bool update_matching_ents = (!is_last || !return_query_value); + + BitArrayIntegerSet &temp = entity_caches->buffers.tempMatchingEntityIndices; + if(update_matching_ents) + temp.clear(); + + for(size_t i = 0; i < num_to_sample; i++) + { + //get a random id out of all valid ones + size_t selected_id; + if(is_first) + selected_id = cond.randomStream.RandSize(num_entities); + else + selected_id = matching_ents.GetNthElement(cond.randomStream.RandSize(num_entities)); + + //keep track if necessary + if(!update_matching_ents) + temp.insert(selected_id); + indices_with_duplicates.push_back(selected_id); + } + + if(!update_matching_ents) + matching_ents = temp; + + break; + } + + case ENT_QUERY_WEIGHTED_SAMPLE: + { + entity_caches->GetMatchingEntitiesViaSamplingWithReplacement(&cond, matching_ents, indices_with_duplicates, is_first, !is_last); + break; + } + + case ENT_QUERY_SELECT: + { + size_t num_to_select = static_cast(cond.maxToRetrieve); + size_t offset = cond.hasStartOffset ? static_cast(cond.startOffset) : 0; //offset to start selecting from, maintains the order given a random seed + + size_t num_entities; + if(is_first) + num_entities = container->GetNumContainedEntities(); + else + num_entities = matching_ents.size(); + + if(num_entities == 0) + break; + + if(is_first && !cond.hasRandomStream) + { + for(size_t i = offset; i < num_to_select + offset && i < num_entities; i++) + matching_ents.insert(i); + } + else + { + BitArrayIntegerSet &temp = entity_caches->buffers.tempMatchingEntityIndices; + temp.clear(); + + if(is_first) //we know hasRandomStream is true from above logic + temp.SetAllIds(num_entities); + else + { + temp = matching_ents; + matching_ents.clear(); + } + + if(cond.hasRandomStream) + { + for(size_t i = 0; i < num_to_select + offset; i++) + { + if(temp.size() == 0) + break; + + //find random + size_t selected_index = cond.randomStream.RandSize(temp.size()); + + selected_index = temp.GetNthElement(selected_index); + temp.erase(selected_index); + + //if before offset, need to burn through random numbers to get consistent results + if(i < offset) + continue; + + //add to results + matching_ents.insert(selected_index); + } + } + else //no random stream, just go in order + { + size_t max_index = std::min(num_to_select + offset, temp.size()); + for(size_t i = offset; i < max_index; i++) + { + size_t selected_index = temp.GetNthElement(i); + matching_ents.insert(selected_index); + } + } + } + + break; + } + + default: + break; + } + } + + //---Return Query Results---// + EntityQueryCondition *last_query = nullptr; + EvaluableNodeType last_query_type = ENT_NULL; + if(conditions.size() > 0) + { + last_query = &conditions.back(); + last_query_type = last_query->queryType; + } + + //function to transform entity indices to entity ids + const auto entity_index_to_id = [container](size_t entity_index) { return container->GetContainedEntityIdFromIndex(entity_index); }; + + //if last query condition is query sample, return each sampled entity id which may include duplicates + if(last_query_type == ENT_QUERY_SAMPLE || last_query_type == ENT_QUERY_WEIGHTED_SAMPLE) + return CreateListOfStringsIdsFromIteratorAndFunction(indices_with_duplicates, enm, entity_index_to_id); + + //return data as appropriate + if(return_query_value && last_query != nullptr) + { + auto &contained_entities = container->GetContainedEntities(); + + //if the query type uses compute results + if(last_query_type == ENT_QUERY_WITHIN_GENERALIZED_DISTANCE + || last_query_type == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE + || last_query_type == ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS + || last_query_type == ENT_COMPUTE_ENTITY_CONVICTIONS + || last_query_type == ENT_COMPUTE_ENTITY_KL_DIVERGENCES) + { + return EntityQueryManager::ConvertResultsToEvaluableNodes(compute_results, + enm, last_query->returnSortedList, last_query->additionalSortedListLabel, + [&contained_entities](auto entity_index) { return contained_entities[entity_index]; }); + } + else //if there are no compute results, return an assoc of the requested labels for each entity + { + //return assoc of distances if requested + EvaluableNode *query_return = enm->AllocNode(ENT_ASSOC); + query_return->ReserveMappedChildNodes(matching_ents.size()); + + //create a string reference for each entity + string_intern_pool.CreateStringReferences(matching_ents, + [&contained_entities](auto entity_index) { return contained_entities[entity_index]->GetIdStringId(); }); + + auto &exist_labels = last_query->existLabels; + + if(exist_labels.size() > 0) + { + //create string reference for each entity's labels + string_intern_pool.CreateMultipleStringReferences(exist_labels, matching_ents.size()); + + for(const auto &entity_index : matching_ents) + { + //create assoc for values for each entity + EvaluableNode *entity_values = enm->AllocNode(ENT_ASSOC); + entity_values->ReserveMappedChildNodes(exist_labels.size()); + query_return->SetMappedChildNodeWithReferenceHandoff(contained_entities[entity_index]->GetIdStringId(), entity_values); + + //get values + for(auto &label_sid : exist_labels) + entity_values->SetMappedChildNodeWithReferenceHandoff(label_sid, contained_entities[entity_index]->GetValueAtLabel(label_sid, enm, false)); + } + } + else //no exist_labels + { + //create a null for every entry, since nothing requested + for(const auto &entity_index : matching_ents) + query_return->SetMappedChildNodeWithReferenceHandoff(contained_entities[entity_index]->GetIdStringId(), nullptr); + } + + return EvaluableNodeReference(query_return, true); + } + } + + return CreateListOfStringsIdsFromIteratorAndFunction(matching_ents, enm, entity_index_to_id); +} + + +EvaluableNodeReference EntityQueryManager::GetEntitiesMatchingQuery(Entity *container, std::vector &conditions, EvaluableNodeManager *enm, bool return_query_value) +{ + if(_enable_SBF_datastore && CanUseQueryCaches(conditions)) + return GetMatchingEntitiesFromQueryCaches(container, conditions, enm, return_query_value); + + if(container == nullptr) + return EvaluableNodeReference(enm->AllocNode(ENT_LIST), true); + + //list of the entities to be found, pruned down, and ultimately returned after converting to matching_entity_ids + std::vector matching_entities; + EvaluableNodeReference query_return_value; + + //start querying + for(size_t cond_index = 0; cond_index < conditions.size(); cond_index++) + { + bool first_condition = (cond_index == 0); + bool last_condition = (cond_index + 1 == conditions.size()); + + //reset to make sure it doesn't return an outdated list + query_return_value = EvaluableNodeReference::Null(); + + //check for any unsupported operations by brute force; if possible, use query caches, otherwise return null + if(conditions[cond_index].queryType == ENT_COMPUTE_ENTITY_CONVICTIONS || conditions[cond_index].queryType == ENT_COMPUTE_ENTITY_KL_DIVERGENCES + || conditions[cond_index].queryType == ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE || conditions[cond_index].queryType == ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS) + { + if(CanUseQueryCaches(conditions)) + return GetMatchingEntitiesFromQueryCaches(container, conditions, enm, return_query_value); + else + return EvaluableNodeReference::Null(); + } + + query_return_value = conditions[cond_index].GetMatchingEntities(container, matching_entities, first_condition, (return_query_value && last_condition) ? enm : nullptr); + } + + //if need to return something specific, then do so, otherwise return list of matching entities + if(query_return_value != nullptr) + return query_return_value; + + SortEntitiesByID(matching_entities); + return CreateListOfStringsIdsFromIteratorAndFunction(matching_entities, enm, [](Entity *e) { return e->GetIdStringId(); }); +} diff --git a/src/Amalgam/entity/EntityQueries.h b/src/Amalgam/entity/EntityQueries.h new file mode 100644 index 00000000..854859ab --- /dev/null +++ b/src/Amalgam/entity/EntityQueries.h @@ -0,0 +1,115 @@ +#pragma once + +//project headers: +#include "Entity.h" +#include "EvaluableNode.h" +#include "GeneralizedDistance.h" + +//system headers: +#include +#include +#include +#include +#include +#include + +//if set to false, will not allow use of the SBF datastore +extern bool _enable_SBF_datastore; + +class EntityQueryCondition +{ +public: + EntityQueryCondition() + : queryType(ENT_NULL) + { } + + //returns true if the entity matches the condition + bool DoesEntityMatchCondition(Entity *e); + + //computes the distance measure of the condition + // returns NaN if invalid + double GetConditionDistanceMeasure(Entity *e); + + EvaluableNodeReference GetMatchingEntities(Entity *container, std::vector &matching_entities, + bool from_all_entities, EvaluableNodeManager *enm); + + EvaluableNodeType queryType; + + //label vector used for existence queries + //**also aliased and used for the list of entity IDs to compute conviction for when type is ENT_COMPUTE_ENTITY_CONVICTIONS + std::vector existLabels; + + //vector used to describe the types of each label or value + std::vector valueTypes; + + //pairs of ids and values + std::vector> singleLabels; + + //pairs of ids and pairs of values + std::vector>> pairedLabels; + + std::vector positionLabels; //the labels that comprise each dimension of the position + std::vector valueToCompare;//sometimes used for position values in conjunction with positionLabels + + GeneralizedDistance distParams; + + //a single standalone label in the query + StringInternPool::StringID singleLabel; + + //when requesting a single type + EvaluableNodeImmediateValueType singleLabelType; + + //a label of an id to exclude + StringInternPool::StringID exclusionLabel; + + //a label representing a weight label + StringInternPool::StringID weightLabel; + + //maximum distance between valueToCompare and the entity + double maxDistance; + + //maximum number of entities to retrieve (based on queryType) + double maxToRetrieve; + + //distance weight exponent for distance queries (takes distance and raises it to the respective exponent) when returning distances + //only applicable when transformSuprisalToProb is false + double distanceWeightExponent; + + //if true, the values will be transformed from surprisal to probability; if false, will perform a distance transform + bool transformSuprisalToProb; + + //if ENT_QUERY_SELECT has a start offset + bool hasStartOffset; + + //ENT_QUERY_SELECT's value of the start offset + size_t startOffset; + + //if ENT_QUERY_SELECT or ENT_QUERY_SAMPLE has a random stream + bool hasRandomStream; + + //ENT_QUERY_SELECT's or ENT_QUERY_SAMPLE's random stream + RandomStream randomStream; + + //includes zero as a valid difference for ENT_QUERY_MIN_DIFFERENCE + bool includeZeroDifferences; + + //quantile percentage, for ENT_QUERY_QUANTILE + double qPercentage; + + //for ENT_QUERY_GENERALIZED_MEAN + double center; + bool calculateMoment; + bool absoluteValue; + + //indicates whether a compute result should be returned as a sorted list + bool returnSortedList; + + //for ENT_QUERY_NEAREST_GENERALIZED_DISTANCE and ENT_QUERY_WITHIN_GENERALIZED_DISTANCE, if returnSortedList is true, additionally return this label if valid + StringInternPool::StringID additionalSortedListLabel; + + //if conviction_of_removal is true, then it will compute the conviction as if the entities were removed, if false, will compute added or included + bool convictionOfRemoval; + + //if true, use concurrency if applicable + bool useConcurrency; +}; diff --git a/src/Amalgam/entity/EntityQueriesStatistics.h b/src/Amalgam/entity/EntityQueriesStatistics.h new file mode 100644 index 00000000..54819f80 --- /dev/null +++ b/src/Amalgam/entity/EntityQueriesStatistics.h @@ -0,0 +1,1001 @@ +#pragma once + +//project headers: +#include "FastMath.h" +#include "DistanceReferencePair.h" +#include "HashMaps.h" +#include "Opcodes.h" +#include "StringInternPool.h" + +//system headers: +#include + +//Contains templated functions that compute statistical queries on data sets +//If weights are used and are zero, then a zero weight will take precedence over infinite or nan values +class EntityQueriesStatistics +{ +public: + + //computes sum of values + //iterates from first to last, calling get_value + // if has_weight, then will use get_weight to obtain the weight of each value + template + static double Sum(EntityIterator first, EntityIterator last, + ValueFunction get_value, bool has_weight, WeightFunction get_weight) + { + double sum = 0.0; + + if(!has_weight) + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + sum += value; + } + } + else + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 0.0; + if(get_weight(i, weight_value)) + { + //don't multiply if zero in case value is infinite + if(weight_value != 0.0) + sum += weight_value * value; + } + else + sum += value; + } + } + } + + return sum; + } + + //computes mode of number values, and returns mode + //iterates from first to last, calling get_value + // if has_weight, then will use get_weight to obtain the weight of each value + template + static double ModeNumber(EntityIterator first, EntityIterator last, + ValueFunction get_value, bool has_weight, WeightFunction get_weight) + { + FastHashMap, DoubleNanHashComparator> value_weights; + + if(!has_weight) + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + auto [inserted_value, inserted] = value_weights.insert(std::make_pair(value, 1.0)); + if(!inserted) + inserted_value->second += 1.0; + } + } + } + else + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + auto [inserted_value, inserted] = value_weights.insert(std::make_pair(value, weight_value)); + if(!inserted) + inserted_value->second += weight_value; + } + } + } + + //find highest value + double mode = std::numeric_limits::quiet_NaN(); + double mode_weight = 0.0; + for(auto &[value, weight] : value_weights) + { + if(weight > mode_weight) + { + mode = value; + mode_weight = weight; + } + } + + return mode; + } + + //computes mode of string ids, and returns a tuple of whether a mode has been found, + // and if so, the mode + //iterates from first to last, calling get_value + // if has_weight, then will use get_weight to obtain the weight of each value + template + static std::pair ModeStringId(EntityIterator first, EntityIterator last, + ValueFunction get_value, bool has_weight, WeightFunction get_weight) + { + FastHashMap value_weights; + + if(!has_weight) + { + for(EntityIterator i = first; i != last; ++i) + { + StringInternPool::StringID value = string_intern_pool.NOT_A_STRING_ID; + if(get_value(i, value)) + { + auto [inserted_value, inserted] = value_weights.insert(std::make_pair(value, 1.0)); + if(!inserted) + inserted_value->second += 1.0; + } + } + } + else + { + for(EntityIterator i = first; i != last; ++i) + { + StringInternPool::StringID value = string_intern_pool.NOT_A_STRING_ID; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + auto [inserted_value, inserted] = value_weights.insert(std::make_pair(value, weight_value)); + if(!inserted) + inserted_value->second += weight_value; + } + } + } + + //find highest value + bool mode_found = false; + StringInternPool::StringID mode = string_intern_pool.NOT_A_STRING_ID; + double mode_weight = 0.0; + for(auto &[value, weight] : value_weights) + { + if(weight > mode_weight) + { + mode_found = true; + mode = value; + mode_weight = weight; + } + } + + return std::make_pair(mode_found, mode); + } + + //computes masses (weights) of each numeric value + //iterates from first to last, calling get_value + // if has_weight, then will use get_weight to obtain the weight of each value + template + static + FastHashMap, DoubleNanHashComparator> + ValueMassesNumber(EntityIterator first, EntityIterator last, size_t estimated_num_unique_values, + ValueFunction get_value, bool has_weight, WeightFunction get_weight) + { + FastHashMap, DoubleNanHashComparator> value_masses; + value_masses.reserve(estimated_num_unique_values); + + if(!has_weight) + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + auto [inserted_value, inserted] = value_masses.insert(std::make_pair(value, 1.0)); + if(!inserted) + inserted_value->second += 1.0; + } + } + } + else + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + auto [inserted_value, inserted] = value_masses.insert(std::make_pair(value, weight_value)); + if(!inserted) + inserted_value->second += weight_value; + } + } + } + + return value_masses; + } + + //computes masses (weights) of each string value + //iterates from first to last, calling get_value + // if has_weight, then will use get_weight to obtain the weight of each value + template + static + FastHashMap + ValueMassesStringId(EntityIterator first, EntityIterator last, size_t estimated_num_unique_values, + ValueFunction get_value, bool has_weight, WeightFunction get_weight) + { + FastHashMap value_masses; + value_masses.reserve(estimated_num_unique_values); + + if(!has_weight) + { + for(EntityIterator i = first; i != last; ++i) + { + StringInternPool::StringID value; + if(get_value(i, value)) + { + auto [inserted_value, inserted] = value_masses.insert(std::make_pair(value, 1.0)); + if(!inserted) + inserted_value->second += 1.0; + } + } + } + else + { + for(EntityIterator i = first; i != last; ++i) + { + StringInternPool::StringID value; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + auto [inserted_value, inserted] = value_masses.insert(std::make_pair(value, weight_value)); + if(!inserted) + inserted_value->second += weight_value; + } + } + } + + return value_masses; + } + + //computes the quantile of the values + //iterates from first to last, calling get_value + // if has_weight, then will use get_weight to obtain the weight of each value. Otherwise, weight is 1. + //q_percentage is the quantile percentage to calculate + //values_buffer is a temporary buffer to hold data that can be reused + template + static double Quantile(EntityIterator first, EntityIterator last, + ValueFunction get_value, bool has_weight, WeightFunction get_weight, double q_percentage, + std::vector> &values_buffer) + { + //invalid range of quantile percentage + if(FastIsNaN(q_percentage) || q_percentage < 0.0 || q_percentage > 1.0) + return std::numeric_limits::quiet_NaN(); + + std::vector>& value_weights = values_buffer; + value_weights.clear(); + double total_weight = 0.0; + bool eq_or_no_weights = true; + + if(!has_weight) + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + value_weights.push_back(std::make_pair(value, 1.0)); + total_weight += 1.0; + } + } + } + else + { + double weight_check = std::numeric_limits::quiet_NaN(); + + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + if(!FastIsNaN(weight_value)) + { + value_weights.push_back(std::make_pair(value, weight_value)); + total_weight += weight_value; + + //check to see if weights are different + if(FastIsNaN(weight_check)) + weight_check = weight_value; + else if(weight_check != weight_value) + eq_or_no_weights = false; + } + } + } + } + + //make sure have valid values and weights + if(value_weights.size() == 0 || total_weight == 0.0) + return std::numeric_limits::quiet_NaN(); + + //sorts on .first - value, not weight + std::sort(std::begin(value_weights), std::end(value_weights)); + + //early outs for edge cases + if(value_weights.size() == 1 || q_percentage == 0.0) + return value_weights.front().first; + else if(q_percentage == 1.0) + return value_weights.back().first; + + //search cumulative density for target quantile + const double first_cdf_term = 0.5 * value_weights.front().second; + const double last_cdf_term = total_weight - 0.5 * value_weights.front().second - 0.5 * value_weights.back().second; + double accum_weight = 0.0; + double cdf_term_prev = 0.0; + for(size_t i = 0; i < value_weights.size(); ++i) + { + const auto &[curr_value, curr_weight] = value_weights[i]; + + //calculuate cdf term + double cdf_term = 0.0; + accum_weight += value_weights[i].second; + cdf_term += accum_weight - 0.5 * value_weights[i].second; + + //there are different ways in which to shift and normalize each individual cdf term, all of which + // produce mathematically correct quantiles (given a quantile is an interval, not a point). To be consistent + // with popular math packages for equal or no weighting, the normalization is a shift and scale based on the + // first and last cdf terms. For weighted samples, the standard normalization using total weight is used. + if(eq_or_no_weights) + { + cdf_term -= first_cdf_term; + cdf_term /= last_cdf_term; + } + else + { + cdf_term /= total_weight; + } + + //edge case for setting initial cdf term and returning first + // value if target quantile is smaller than cdf_term + if(i == 0) + { + cdf_term_prev = cdf_term; + if(q_percentage <= cdf_term) + return curr_value; + } + + //check for found quantile + if(q_percentage == cdf_term_prev) + return value_weights[i - 1].first; + else if(q_percentage == cdf_term) + return curr_value; + else if(cdf_term_prev < q_percentage && q_percentage < cdf_term) + { + const auto& prev_value = value_weights[i - 1].first; + + //linearly interpolate + return prev_value + (curr_value - prev_value) * (q_percentage - cdf_term_prev) / (cdf_term - cdf_term_prev); + } + + cdf_term_prev = cdf_term; + } + + //if didn't find (quantile percentage larger than last cdf term), use last element + return value_weights.back().first; + } + + //computes the generalized mean of the values where p_value is the parameter for the generalized mean + //center is the center the calculation is around, default is 0.0 + //if calculate_moment is true, the final calculation will not be raised to 1/p for p>=1 + //if absolute_value is true, the first order mean (p=1) will take the absolute value + //iterates from first to last, calling get_value + // if has_weight, then will use get_weight to obtain the weight of each value + //has separate paths for different values of p_value for efficiency + template + static double GeneralizedMean(EntityIterator first, EntityIterator last, + ValueFunction get_value, bool has_weight, WeightFunction get_weight, + double p_value, double center = 0.0, bool calculate_moment = false, bool absolute_value = false) + { + double mean = 0.0; + + if(!has_weight) + { + size_t num_elements = 0; + + if(p_value == 1.0) // arithmetic + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double diff = value - center; + mean += (absolute_value ? std::abs(diff) : diff); + num_elements++; + } + } + + mean /= num_elements; + } + else if(p_value == 2.0) // root mean square (quadratic) + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double diff = value - center; + mean += diff * diff; + num_elements++; + } + } + + mean /= num_elements; + if(!calculate_moment) + mean = std::sqrt(mean); + } + else if(p_value == 0.0) // geometric + { + mean = 1.0; + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + mean *= (value - center); + num_elements++; + } + } + + if(!calculate_moment) + mean = std::pow(mean, 1.0 / num_elements); + } + else if(p_value == -1.0) // harmonic + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + mean += (1.0 / (value - center)); + num_elements++; + } + } + + mean /= num_elements; + if(!calculate_moment) + mean = (1.0 / mean); + } + else + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + mean += std::pow(value - center, p_value); + num_elements++; + } + } + + mean /= num_elements; + if(!calculate_moment) + mean = std::pow(mean, 1.0 / p_value); + } + } + else //use weights + { + double weights_sum = 0.0; + + if(p_value == 1.0) // arithmetic + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + //don't multiply if zero in case value is infinite + if(weight_value != 0.0) + { + mean += weight_value * (value - center); + weights_sum += weight_value; + } + } + } + + //can divide at the end because multiplication is associative and communative + mean /= weights_sum; + } + else if(p_value == 2.0) // root mean square (quadratic) + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + //don't multiply if zero in case value is infinite + if(weight_value != 0.0) + { + double diff = value - center; + mean += weight_value * diff * diff; + weights_sum += weight_value; + } + } + } + + //can divide at the end because multiplication is associative and communative + mean /= weights_sum; + if(!calculate_moment) + mean = std::sqrt(mean); + } + else if(p_value == 0.0) // geometric + { + //collect weights total first + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + //don't multiply if zero in case value is infinite + if(weight_value != 0.0) + weights_sum += weight_value; + } + } + + mean = 1.0; + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + //don't multiply if zero in case value is infinite + if(weight_value != 0.0) + mean *= std::pow(value - center, weight_value); + } + } + + if(!calculate_moment) + mean = std::pow(mean, 1.0 / weights_sum); + } + else if(p_value == -1.0) // harmonic + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + //don't multiply if zero in case value is infinite + if(weight_value != 0.0) + { + mean += weight_value / (value - center); + weights_sum += weight_value; + } + } + } + + //can divide at the end because multiplication is associative and communative + mean /= weights_sum; + if(!calculate_moment) + mean = (1.0 / mean); + } + else + { + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + double weight_value = 1.0; + get_weight(i, weight_value); + + //don't multiply if zero in case value is infinite + if(weight_value != 0.0) + { + mean += weight_value * std::pow(value - center, p_value); + weights_sum += weight_value; + } + } + } + + //can divide at the end because multiplication is associative and communative + mean /= weights_sum; + if(!calculate_moment) + mean = std::pow(mean, 1.0 / p_value); + } + } + + return mean; + } + + //computes the extreme value of the values + //if select_min_value is true, will return the minimum, otherwise returns the maximum + //max_distance is the maximum distance anything can be (infinity is a valid value) + //if include_zero_distances is true, then it will include zero distance if that is the extreme value + //iterates from first to last, calling get_value + // if has_weight, then will use get_weight to obtain the weight of each value + // values_buffer is a temporary buffer to hold data that can be reused + template + static double ExtremeDifference(EntityIterator first, EntityIterator last, + ValueFunction get_value, + bool select_min_value, double max_distance, bool include_zero_distances, + std::vector &values_buffer) + { + std::vector &values = values_buffer; + values_buffer.clear(); + + for(EntityIterator i = first; i != last; ++i) + { + double value = 0.0; + if(get_value(i, value)) + { + //don't compare nulls (nans) because they don't contribute to finding an extreme difference + if(!FastIsNaN(value)) + values.push_back(value); + } + } + + //deal with edge cases + //if no values, then don't have any gaps + if(values.size() == 0) + return std::numeric_limits::quiet_NaN(); + + //if have one value, then infinite gap + if(values.size() == 1) + { + if(!FastIsNaN(max_distance)) + return std::numeric_limits::infinity(); + else + return max_distance; + } + + std::sort(begin(values), end(values)); + + double extreme_distance; + if(select_min_value) + { + extreme_distance = std::numeric_limits::infinity(); + for(size_t i = 0; i + 1 < values.size(); i++) + { + double delta = values[i + 1] - values[i]; + + //skip zeros if applicable + if(include_zero_distances && delta == 0) + continue; + + if(delta < extreme_distance) + extreme_distance = delta; + } + + if(!FastIsNaN(max_distance)) + { + double dist_between_ends = values[0] + std::max(0.0, max_distance - values[values.size() - 1]); + if(dist_between_ends < extreme_distance) + extreme_distance = dist_between_ends; + } + } + else //max value + { + extreme_distance = 0.0; + for(size_t i = 0; i + 1 < values.size(); i++) + { + double delta = values[i + 1] - values[i]; + if(delta > extreme_distance) + extreme_distance = delta; + } + + if(!FastIsNaN(max_distance)) + { + double dist_between_ends = values[0] + std::max(0.0, max_distance - values[values.size() - 1]); + if(dist_between_ends > extreme_distance) + extreme_distance = dist_between_ends; + } + } + + return extreme_distance; + } + + //holds parameters and transforms distances and surprisals + //EntityReference is the type of reference to an entity, and entity_reference is the reference itself + //index is the element number as sorted by smallest distance, where 0 is the entity with the smallest distance + //if surprisal_to_probability is true, it will transform surprisal to probability + //if surprisal_to_probability is false, distance_weight_exponent is the exponent each distance is raised to + //has_weight, if set, will use get_weight, taking in a function of an entity reference and a reference to an output + // double to set the weight, and should return true if the entity has a weight, false if not + template + class DistanceTransform + { + public: + constexpr DistanceTransform(bool surprisal_to_probability, double distance_weight_exponent, + bool has_weight, std::function get_weight) + { + distanceWeightExponent = distance_weight_exponent; + transformSuprisalToProb = surprisal_to_probability; + hasWeight = has_weight; + getEntityWeightFunction = get_weight; + } + + //transforms distances with regard to distance weight exponents, harmonic series, and entity weights as specified by parameters, + // transforming and updating the distances in entity_distance_pair_container in place + //EntityDistancePairContainer is the container for the entity-distance pairs, and EntityReference is the reference to the entity + //entity_distance_pair_container is the iterable container of the entity-distance pairs + //distance_weight_exponent is the exponent each distance is raised to + //has_weight, if set, will use get_weight, taking in a function of an entity reference and a reference to an output double to set the weight, + // and should return true if the entity has a weight, false if not + //sort_results, if set, will sort the results appropriately for the distance_weight_exponent, + // from smallest to largest if distance_weight_exponent is positive, largest to smallest otherwise + //get_entity returns the EntityReference for an iterator of EntityDistancePairContainer + //get_distance_ref returns a reference as a pointer to the location of the distance in the EntityDistancePairContainer + inline void TransformDistances(std::vector> &entity_distance_pair_container, bool sort_results) + { + if(transformSuprisalToProb) + { + //convert to surprisal + for(auto iter = begin(entity_distance_pair_container); iter != end(entity_distance_pair_container); ++iter) + iter->distance = std::exp(-iter->distance); + + if(hasWeight) + { + //if weighted, need to weight by the logical OR of all probability masses + // this is complex to compute if done as P(A or B) = P(A) + P(B) - P(A and B), + //but is much more simple if computed as P(A or B) = 1 - ( (1 - P(A)) and (1 - P(B))) + //the latter is a multiplication, lending itself to raising to the power of the weight + //e.g., a weight of 2 is (1 - P(A))^2 + for(auto iter = begin(entity_distance_pair_container); iter != end(entity_distance_pair_container); ++iter) + { + double weight = 1.0; + //if has a weight and not 1 (since 1 is fast) + if(getEntityWeightFunction(iter->reference, weight) && weight != 1.0) + { + if(weight != 0.0) + { + double prob_not_same = 1.0 - iter->distance; + double weighted_prob_not_same = std::pow(prob_not_same, weight); + iter->distance = 1.0 - weighted_prob_not_same; + } + else //weight of 0.0 + { + iter->distance = 0.0; + } + } + } + } + } + else //distance transform + { + if(distanceWeightExponent == -1) + { + for(auto iter = begin(entity_distance_pair_container); iter != end(entity_distance_pair_container); ++iter) + iter->distance = 1.0 / iter->distance; + } + else if(distanceWeightExponent == 0) + { + for(auto iter = begin(entity_distance_pair_container); iter != end(entity_distance_pair_container); ++iter) + iter->distance = 1.0; + } + else if(distanceWeightExponent != 1) + { + if(distanceWeightExponent >= 0) + { + for(auto iter = begin(entity_distance_pair_container); iter != end(entity_distance_pair_container); ++iter) + iter->distance = std::pow(iter->distance, distanceWeightExponent); + } + else //need special handling for zero distances to prevent NaN + { + for(auto iter = begin(entity_distance_pair_container); iter != end(entity_distance_pair_container); ++iter) + { + if(iter->distance == 0.0) + iter->distance = std::numeric_limits::infinity(); + else + iter->distance = std::pow(iter->distance, distanceWeightExponent); + } + } + } + //else distanceWeightExponent == 1, which means just leave it + + if(hasWeight) + { + for(auto iter = begin(entity_distance_pair_container); iter != end(entity_distance_pair_container); ++iter) + { + double weight = 1.0; + if(getEntityWeightFunction(iter->reference, weight)) + { + if(weight != 0.0) + iter->distance *= weight; + else + iter->distance = 0.0; + } + } + } + } + + if(sort_results) + { + //if distance, sort by smallest first + if(!transformSuprisalToProb && distanceWeightExponent > 0) + { + std::sort(begin(entity_distance_pair_container), end(entity_distance_pair_container), + [](auto a, auto b) {return a.distance < b.distance; } + ); + } + else //inverse distance, sort by largest first + { + std::sort(begin(entity_distance_pair_container), end(entity_distance_pair_container), + [](auto a, auto b) {return a.distance > b.distance; } + ); + } + } + } + + //like TransformDistances but returns the appropriate expected value + template + inline double TransformDistancesToExpectedValue( + EntityDistancePairIterator entity_distance_pair_container_begin, + EntityDistancePairIterator entity_distance_pair_container_end) + { + if(transformSuprisalToProb) + { + //need to weight by the logical OR of all probability masses + // this is complex to compute if done as P(A or B) = P(A) + P(B) - P(A and B), + //but is much more simple if computed as P(A or B) = 1 - ( (1 - P(A)) and (1 - P(B))) + //the latter is a multiplication, additionally lending itself to raising to the power of the weight + //e.g., a weight of 2 is (1 - P(A))^2 + double prob_none_same = 1.0; + + if(hasWeight) + { + //convert to surprisal + for(auto iter = entity_distance_pair_container_begin; iter != entity_distance_pair_container_end; ++iter) + { + double prob_same = std::exp(-iter->distance); + double prob_not_same = 1.0 - prob_same; + + double weight = 1.0; + if(getEntityWeightFunction(iter->reference, weight) && weight != 1.0) + { + if(weight == 0.0) + continue; + + prob_not_same = std::pow(prob_not_same, weight); + } + + prob_none_same *= prob_not_same; + } + } + else //!hasWeight + { + //convert to surprisal + for(auto iter = entity_distance_pair_container_begin; iter != entity_distance_pair_container_end; ++iter) + { + double prob_same = std::exp(-iter->distance); + double prob_not_same = 1.0 - prob_same; + prob_none_same *= prob_not_same; + } + } + + double any_prob_same = 1 - prob_none_same; + return -std::log(any_prob_same); + } + else //distance transform + { + if(hasWeight) + { + return GeneralizedMean>::iterator>( + entity_distance_pair_container_begin, + entity_distance_pair_container_end, + [](typename std::vector>::iterator iter, double &value) + { value = iter->distance; return true; }, + true, + [this](typename std::vector>::iterator iter, double &weight) + { return getEntityWeightFunction(iter->reference, weight); }, + distanceWeightExponent); + } + else + { + return GeneralizedMean>::iterator>( + entity_distance_pair_container_begin, + entity_distance_pair_container_end, + [](typename std::vector>::iterator iter, double &value) + { value = iter->distance; return true; }, + false, + [](typename std::vector>::iterator iter, double &weight) + { return false; }, + distanceWeightExponent); + } + } + + } + + //Computes the distance contribution as a type of generalized mean with special handling for distances of zero + // entity is the entity that the distance contribution is being performed on, and entity_distance_pair_container are the distances to + // its nearest entities + // the functions get_entity and get_distance_ref return the entity and reference to the distance for an iterator of entity_distance_pair_container + double ComputeDistanceContribution(std::vector> &entity_distance_pair_container, EntityReference entity) + { + double distance_contribution = 0.0; + //there's at least one entity in question + size_t num_identical_entities = 1; + + auto entity_distance_begin = begin(entity_distance_pair_container); + auto entity_distance_iter = entity_distance_begin; + + //if no weight, can do a more streamlined process + if(!hasWeight) + { + //count the number of zero distances + for(; entity_distance_iter != end(entity_distance_pair_container); ++entity_distance_iter) + { + if(entity_distance_iter->distance != 0.0) + break; + + num_identical_entities++; + } + + distance_contribution = TransformDistancesToExpectedValue(entity_distance_iter, end(entity_distance_pair_container)); + + //split the distance contribution among the identical entities + return distance_contribution / num_identical_entities; + } + + double weight_of_identical_entities = 0.0; + + //count the number of zero distances and get the associated weight, + // since this weight isn't accounted for in the other distances + for(; entity_distance_iter != end(entity_distance_pair_container); ++entity_distance_iter) + { + if(entity_distance_iter->distance != 0.0) + break; + + double weight = 1.0; + if(getEntityWeightFunction(entity_distance_iter->reference, weight)) + weight_of_identical_entities += weight; + else + weight_of_identical_entities += 1.0; + } + + distance_contribution = TransformDistancesToExpectedValue(entity_distance_iter, end(entity_distance_pair_container)); + + //if no cases had any weight, distance contribution is 0 + if(FastIsNaN(distance_contribution)) + return 0.0; + + double entity_weight = 1.0; + if(getEntityWeightFunction(entity, entity_weight)) + { + if(entity_weight != 0) + distance_contribution *= entity_weight; + else + return 0.0; + } + + //split the distance contribution among the identical entities + return distance_contribution * entity_weight / (weight_of_identical_entities + entity_weight); + } + + //exponent by which to scale the distances + //only applicable when transformSuprisalToProb is false + double distanceWeightExponent; + + //if true, the values will be transformed from surprisal to probability; if false, will perform a distance transform + bool transformSuprisalToProb; + + //if hasWeight is true, then will call getEntityWeightFunction and apply the respective entity weight to each distance + bool hasWeight; + std::function getEntityWeightFunction; + }; +}; diff --git a/src/Amalgam/entity/EntityQueryBuilder.h b/src/Amalgam/entity/EntityQueryBuilder.h new file mode 100644 index 00000000..692d7dfc --- /dev/null +++ b/src/Amalgam/entity/EntityQueryBuilder.h @@ -0,0 +1,850 @@ +#pragma once + +//project headers: +#include "EntityQueries.h" +#include "EvaluableNode.h" +#include "StringInternPool.h" + +//Constructs a query engine query condition from Amalgam evaluable nodes +namespace EntityQueryBuilder +{ + //parameter indices for distance queries + enum DistParamIndices : size_t + { + MAX_TO_FIND_OR_MAX_DISTANCE, + POSITION_LABELS, + POSITION, + + WEIGHTS, + DISTANCE_TYPES, + ATTRIBUTES, + DEVIATIONS, + + //optional params + MINKOWSKI_PARAMETER, + DISTANCE_VALUE_TRANSFORM, + ENTITY_WEIGHT_LABEL_NAME, + RANDOM_SEED, + RADIUS_LABEL, + NUMERICAL_PRECISION, + + NUM_MINKOWSKI_DISTANCE_QUERY_PARAMETERS //always last - do not add after this + }; + + constexpr bool DoesDistanceQueryUseEntitiesInsteadOfPosition(EvaluableNodeType type) + { + return (type == ENT_COMPUTE_ENTITY_CONVICTIONS + || type == ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE + || type == ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS + || type == ENT_COMPUTE_ENTITY_KL_DIVERGENCES); + } + + //populates the features of dist_params based on either num_elements or element_names for each of the + // four different attribute parameters based on its type (using num_elements if list or immediate, element_names if assoc) + inline void PopulateDistanceFeatureParameters(GeneralizedDistance &dist_params, + size_t num_elements, std::vector &element_names, + EvaluableNode *weights_node, EvaluableNode *distance_types_node, EvaluableNode *attributes_node, EvaluableNode *deviations_node) + { + dist_params.featureParams.resize(num_elements); + + //get weights + EvaluableNode::ConvertChildNodesAndStoreValue(weights_node, element_names, num_elements, + [&dist_params](size_t i, bool found, EvaluableNode *en) { + if(i < dist_params.featureParams.size()) + { + if(found) + dist_params.featureParams[i].weight = EvaluableNode::ToNumber(en); + else + dist_params.featureParams[i].weight = 1.0; + } + }); + + //get type + EvaluableNode::ConvertChildNodesAndStoreValue(distance_types_node, element_names, num_elements, + [&dist_params](size_t i, bool found, EvaluableNode *en) { + if(i < dist_params.featureParams.size()) + { + auto feature_type = FDT_CONTINUOUS_NUMERIC; + if(found) + { + StringInternPool::StringID feature_type_id = EvaluableNode::ToStringIDIfExists(en); + switch(feature_type_id) + { + case ENBISI_nominal: feature_type = FDT_NOMINAL; break; + case ENBISI_continuous: feature_type = FDT_CONTINUOUS_NUMERIC; break; + case ENBISI_cyclic: feature_type = FDT_CONTINUOUS_NUMERIC_CYCLIC; break; + case GetStringIdFromNodeTypeFromString(ENT_STRING): feature_type = FDT_CONTINUOUS_STRING; break; + case ENBISI_code: feature_type = FDT_CONTINUOUS_CODE; break; + default: feature_type = FDT_CONTINUOUS_NUMERIC; break; + } + } + dist_params.featureParams[i].featureType = feature_type; + } + }); + + //get attributes + EvaluableNode::ConvertChildNodesAndStoreValue(attributes_node, element_names, num_elements, + [&dist_params](size_t i, bool found, EvaluableNode *en) { + if(i < dist_params.featureParams.size()) + { + dist_params.featureParams[i].unknownToUnknownDifference = std::numeric_limits::quiet_NaN(); + dist_params.featureParams[i].knownToUnknownDifference = std::numeric_limits::quiet_NaN(); + + //get attributes based on feature type + switch(dist_params.featureParams[i].featureType) + { + case FDT_NOMINAL: + if(found && !EvaluableNode::IsNull(en)) + { + if(en->EvaluableNode::IsOrderedArray()) + { + auto &ocn = en->GetOrderedChildNodesReference(); + size_t ocn_size = ocn.size(); + if(ocn_size > 0) + dist_params.featureParams[i].typeAttributes.nominalCount = EvaluableNode::ToNumber(ocn[0]); + if(ocn_size > 1) + dist_params.featureParams[i].knownToUnknownDifference = EvaluableNode::ToNumber(ocn[1]); + if(ocn_size > 2) + dist_params.featureParams[i].unknownToUnknownDifference = EvaluableNode::ToNumber(ocn[2]); + } + else //treat as singular value + { + dist_params.featureParams[i].typeAttributes.nominalCount = EvaluableNode::ToNumber(en); + } + } + else + { + dist_params.featureParams[i].typeAttributes.nominalCount = 0.0; + } + break; + + case FDT_CONTINUOUS_NUMERIC_CYCLIC: + if(found && !EvaluableNode::IsNull(en)) + { + if(en->EvaluableNode::IsOrderedArray()) + { + auto &ocn = en->GetOrderedChildNodesReference(); + size_t ocn_size = ocn.size(); + if(ocn_size > 0) + dist_params.featureParams[i].typeAttributes.maxCyclicDifference = EvaluableNode::ToNumber(ocn[0]); + if(ocn_size > 1) + dist_params.featureParams[i].knownToUnknownDifference = EvaluableNode::ToNumber(ocn[1]); + if(ocn_size > 2) + dist_params.featureParams[i].unknownToUnknownDifference = EvaluableNode::ToNumber(ocn[2]); + } + else //treat as singular value + { + dist_params.featureParams[i].typeAttributes.maxCyclicDifference = EvaluableNode::ToNumber(en); + } + } + else //can't be cyclic without a range + { + dist_params.featureParams[i].featureType = FDT_CONTINUOUS_NUMERIC; + } + break; + + case FDT_CONTINUOUS_NUMERIC: + case FDT_CONTINUOUS_UNIVERSALLY_NUMERIC: + case FDT_CONTINUOUS_STRING: + case FDT_CONTINUOUS_CODE: + if(found && !EvaluableNode::IsNull(en)) + { + if(en->EvaluableNode::IsOrderedArray()) + { + auto &ocn = en->GetOrderedChildNodesReference(); + size_t ocn_size = ocn.size(); + if(ocn_size > 0) + dist_params.featureParams[i].knownToUnknownDifference = EvaluableNode::ToNumber(ocn[0]); + if(ocn_size > 1) + dist_params.featureParams[i].unknownToUnknownDifference = EvaluableNode::ToNumber(ocn[1]); + } + else //treat as singular value + { + dist_params.featureParams[i].knownToUnknownDifference = EvaluableNode::ToNumber(en); + } + } + break; + } + } + }); + + //get deviations + EvaluableNode::ConvertChildNodesAndStoreValue(deviations_node, element_names, num_elements, + [&dist_params](size_t i, bool found, EvaluableNode *en) { + if(i < dist_params.featureParams.size()) + { + if(found) + dist_params.featureParams[i].deviation = EvaluableNode::ToNumber(en); + else + dist_params.featureParams[i].deviation = 0.0; + } + }); + } + + + //interpret evaluable node as a distance query + inline void BuildDistanceCondition(EvaluableNode *cn, EvaluableNodeType condition_type, std::vector &conditions) + { + //cache ordered child nodes so don't need to keep fetching + auto &ocn = cn->GetOrderedChildNodes(); + + //need to at least have position, otherwise not valid query + if(ocn.size() <= POSITION) + return; + + //if ENT_QUERY_NEAREST_GENERALIZED_DISTANCE, see if excluding an entity in the previous query -- if so, exclude here + EntityQueryCondition *cur_condition = nullptr; + if(condition_type == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE && conditions.size() > 0 + && conditions.back().queryType == ENT_QUERY_NOT_IN_ENTITY_LIST && conditions.back().existLabels.size() == 1) + { + cur_condition = &(conditions.back()); + cur_condition->exclusionLabel = cur_condition->existLabels[0]; + cur_condition->existLabels.clear(); + } + else + { + //create a new condition for distance + conditions.emplace_back(); + cur_condition = &(conditions.back()); + + cur_condition->exclusionLabel = string_intern_pool.NOT_A_STRING_ID; + } + + //set query condition type + cur_condition->queryType = condition_type; + cur_condition->useConcurrency = cn->GetConcurrency(); + + //set maximum distance and max number of results (top_k) to find + if(condition_type == ENT_QUERY_WITHIN_GENERALIZED_DISTANCE) //maximum distance to search within + { + cur_condition->maxToRetrieve = std::numeric_limits::infinity(); + cur_condition->maxDistance = EvaluableNode::ToNumber(ocn[MAX_TO_FIND_OR_MAX_DISTANCE]); + if(FastIsNaN(cur_condition->maxDistance)) + cur_condition->maxDistance = 0; + } + else //infinite range query, use param as number to find (top_k) + { + cur_condition->maxToRetrieve = EvaluableNode::ToNumber(ocn[MAX_TO_FIND_OR_MAX_DISTANCE]); + if(FastIsNaN(cur_condition->maxToRetrieve)) + cur_condition->maxToRetrieve = 0; + cur_condition->maxDistance = std::numeric_limits::infinity(); + } + + //set position labels + EvaluableNode *position_labels = ocn[POSITION_LABELS]; + if(EvaluableNode::IsOrderedArray(position_labels)) + { + cur_condition->positionLabels.reserve(position_labels->GetOrderedChildNodes().size()); + for(auto &pl : position_labels->GetOrderedChildNodes()) + { + StringInternPool::StringID label_sid = EvaluableNode::ToStringIDIfExists(pl); + if(Entity::IsLabelValidAndPublic(label_sid)) + cur_condition->positionLabels.push_back(label_sid); + else + cur_condition->queryType = ENT_NULL; + } + } + + //select based on type for position or entities + if(DoesDistanceQueryUseEntitiesInsteadOfPosition(condition_type)) + { + EvaluableNode *entities = ocn[POSITION]; + if(EvaluableNode::IsOrderedArray(entities)) + { + auto &entities_ocn = entities->GetOrderedChildNodesReference(); + cur_condition->existLabels.reserve(entities_ocn.size()); + for(auto &entity_en : entities_ocn) + cur_condition->existLabels.push_back(EvaluableNode::ToStringIDIfExists(entity_en)); + } + } + else + { + //set position + EvaluableNode *position = ocn[POSITION]; + if(EvaluableNode::IsOrderedArray(position) && (position->GetNumChildNodes() == cur_condition->positionLabels.size())) + { + auto &position_ocn = position->GetOrderedChildNodesReference(); + cur_condition->valueToCompare.reserve(position_ocn.size()); + cur_condition->valueTypes.reserve(position_ocn.size()); + for(auto &pos_en : position_ocn) + { + EvaluableNodeImmediateValue imm_val; + auto value_type = imm_val.CopyValueFromEvaluableNode(pos_en); + cur_condition->valueTypes.push_back(value_type); + cur_condition->valueToCompare.push_back(imm_val); + } + } + else // no positions given, default to nulls for each label + { + cur_condition->valueToCompare.reserve(cur_condition->positionLabels.size()); + cur_condition->valueTypes.reserve(cur_condition->positionLabels.size()); + for(size_t i = 0; i < cur_condition->positionLabels.size(); i++) + { + cur_condition->valueTypes.push_back(ENIVT_NULL); + cur_condition->valueToCompare.push_back(EvaluableNodeImmediateValue()); + } + } + } + //else don't bother parsing this, it instead contains the cases to compute case conviction for + + size_t num_elements = cur_condition->positionLabels.size(); + auto &dist_params = cur_condition->distParams; + + EvaluableNode *weights_node = nullptr; + if(ocn.size() > WEIGHTS) + weights_node = ocn[WEIGHTS]; + + EvaluableNode *distance_types_node = nullptr; + if(ocn.size() > DISTANCE_TYPES) + distance_types_node = ocn[DISTANCE_TYPES]; + + EvaluableNode *attributes_node = nullptr; + if(ocn.size() > ATTRIBUTES) + attributes_node = ocn[ATTRIBUTES]; + + EvaluableNode *deviations_node = nullptr; + if(ocn.size() > DEVIATIONS) + deviations_node = ocn[DEVIATIONS]; + + PopulateDistanceFeatureParameters(dist_params, num_elements, cur_condition->positionLabels, + weights_node, distance_types_node, attributes_node, deviations_node); + + //set minkowski parameter; default to 2.0 for Euclidian distance + double p_value = 2.0; + if(ocn.size() > MINKOWSKI_PARAMETER) + { + p_value = EvaluableNode::ToNumber(ocn[MINKOWSKI_PARAMETER]); + + //make sure valid value, if not, fall back to 2 + if(FastIsNaN(p_value) || p_value < 0) + p_value = 2; + } + cur_condition->distParams.pValue = p_value; + + //value transforms for whatever is measured as "distance" + cur_condition->transformSuprisalToProb = false; + cur_condition->distanceWeightExponent = 1.0; + if(ocn.size() > DISTANCE_VALUE_TRANSFORM) + { + EvaluableNode *dwe_param = ocn[DISTANCE_VALUE_TRANSFORM]; + if(!EvaluableNode::IsNull(dwe_param)) + { + if(dwe_param->GetType() == ENT_STRING && dwe_param->GetStringIDReference() == ENBISI_surprisal_to_prob) + cur_condition->transformSuprisalToProb = true; + else //try to convert to number + cur_condition->distanceWeightExponent = EvaluableNode::ToNumber(dwe_param, 1.0); + } + } + + cur_condition->weightLabel = StringInternPool::NOT_A_STRING_ID; + if(ocn.size() > ENTITY_WEIGHT_LABEL_NAME) + cur_condition->weightLabel = EvaluableNode::ToStringIDIfExists(ocn[ENTITY_WEIGHT_LABEL_NAME]); + + //set random seed + std::string seed = ""; + if(ocn.size() > RANDOM_SEED) + seed = EvaluableNode::ToString(ocn[RANDOM_SEED]); + cur_condition->randomStream.SetState(seed); + + //set radius label + if(ocn.size() > RADIUS_LABEL) + cur_condition->singleLabel = EvaluableNode::ToStringIDIfExists(ocn[RADIUS_LABEL]); + else + cur_condition->singleLabel = StringInternPool::NOT_A_STRING_ID; + + //set numerical precision + cur_condition->distParams.highAccuracy = false; + cur_condition->distParams.recomputeAccurateDistances = true; + if(ocn.size() > NUMERICAL_PRECISION) + { + StringInternPool::StringID np_sid = EvaluableNode::ToStringIDIfExists(ocn[NUMERICAL_PRECISION]); + if(np_sid == ENBISI_precise) + { + cur_condition->distParams.highAccuracy = true; + cur_condition->distParams.recomputeAccurateDistances = false; + } + else if(np_sid == ENBISI_fast) + { + cur_condition->distParams.highAccuracy = false; + cur_condition->distParams.recomputeAccurateDistances = false; + } + //don't need to do anything for np_sid == ENBISI_recompute_precise because it's default + } + + cur_condition->returnSortedList = false; + cur_condition->additionalSortedListLabel = string_intern_pool.NOT_A_STRING_ID; + if(condition_type == ENT_QUERY_WITHIN_GENERALIZED_DISTANCE || condition_type == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE || condition_type == ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS) + { + if(ocn.size() > NUM_MINKOWSKI_DISTANCE_QUERY_PARAMETERS + 0) + { + EvaluableNode *list_param = ocn[NUM_MINKOWSKI_DISTANCE_QUERY_PARAMETERS + 0]; + cur_condition->returnSortedList = EvaluableNode::IsTrue(list_param); + if(!EvaluableNode::IsEmptyNode(list_param) && list_param->GetType() != ENT_TRUE && list_param->GetType() != ENT_FALSE) + cur_condition->additionalSortedListLabel = EvaluableNode::ToStringIDIfExists(list_param); + } + } + else if(condition_type == ENT_COMPUTE_ENTITY_CONVICTIONS || condition_type == ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE || condition_type == ENT_COMPUTE_ENTITY_KL_DIVERGENCES) + { + cur_condition->convictionOfRemoval = false; + if(ocn.size() > NUM_MINKOWSKI_DISTANCE_QUERY_PARAMETERS + 0) + cur_condition->convictionOfRemoval = EvaluableNode::IsTrue(ocn[NUM_MINKOWSKI_DISTANCE_QUERY_PARAMETERS + 0]); + + if(condition_type == ENT_COMPUTE_ENTITY_CONVICTIONS || condition_type == ENT_COMPUTE_ENTITY_KL_DIVERGENCES) + { + if(ocn.size() > NUM_MINKOWSKI_DISTANCE_QUERY_PARAMETERS + 1) + { + EvaluableNode *list_param = ocn[NUM_MINKOWSKI_DISTANCE_QUERY_PARAMETERS + 1]; + cur_condition->returnSortedList = EvaluableNode::IsTrue(list_param); + if(!EvaluableNode::IsEmptyNode(list_param) && list_param->GetType() != ENT_TRUE && list_param->GetType() != ENT_FALSE) + cur_condition->additionalSortedListLabel = EvaluableNode::ToStringIDIfExists(list_param); + } + } + } + + + //check if any of the positions are not valid + bool need_exist_query = false; + bool has_position_data = !DoesDistanceQueryUseEntitiesInsteadOfPosition(condition_type); + + //check for any disabled features (e.g., zero'd weight) + if(has_position_data) + { + for(size_t i = 0; i < cur_condition->distParams.featureParams.size(); i++) + { + if(!cur_condition->distParams.IsFeatureEnabled(i)) + { + need_exist_query = true; + break; + } + } + } + else //entities may have missing data, so need exist query + { + need_exist_query = true; + } + + if(need_exist_query) + { + //add exists query and swap, so the exists_condition is before cur_condition + conditions.emplace_back(); + EntityQueryCondition *exists_condition = &(conditions.back()); + + //need to reretrieve the pointer in case there has been a reallocation via emplace_back + // don't get the end one just placed, get the one before that + cur_condition = &conditions[conditions.size() - 2]; + + //swap data and pointers + std::swap(*exists_condition, *cur_condition); + std::swap(exists_condition, cur_condition); + + exists_condition->queryType = ENT_QUERY_EXISTS; + //if has_position_data, then will add on those needed features below + // but if it doesn't, then need to include all labels + if(!has_position_data) + exists_condition->existLabels = cur_condition->positionLabels; + + //remove any 0 weighted features; if has_position_data, then move them to the exist query + // don't increment i here because if a feature is moved to the exists_condition, + // then a new feature is moved into that new index and that feature position needs to be rechecked + for(size_t i = 0; i < cur_condition->positionLabels.size();) + { + if(cur_condition->distParams.featureParams[i].weight == 0.0) + { + //only move/remove data if the right type of query + if(has_position_data) + { + exists_condition->existLabels.push_back(cur_condition->positionLabels[i]); + cur_condition->valueToCompare.erase(cur_condition->valueToCompare.begin() + i); + cur_condition->valueTypes.erase(cur_condition->valueTypes.begin() + i); + } + + cur_condition->positionLabels.erase(cur_condition->positionLabels.begin() + i); + cur_condition->distParams.featureParams.erase(begin(cur_condition->distParams.featureParams) + i); + continue; + } + + i++; + } + } + + //perform this last to make sure all changes are in + cur_condition->distParams.SetAndConstrainParams(); + } + + //builds a query condition from cn + inline void BuildNonDistanceCondition(EvaluableNode *cn, EvaluableNodeType type, + std::vector &conditions, EvaluableNodeManager &enm, RandomStream &rs) + { + auto &ocn = cn->GetOrderedChildNodes(); + + //validate number of parameters + switch(type) + { + case ENT_QUERY_BETWEEN: //all double parameter query types + case ENT_QUERY_NOT_BETWEEN: + if(ocn.size() < 3) + return; + break; + + case ENT_QUERY_LESS_OR_EQUAL_TO: + case ENT_QUERY_GREATER_OR_EQUAL_TO: + case ENT_QUERY_NOT_EQUALS: + case ENT_QUERY_EQUALS: + if(ocn.size() < 2) + return; + break; + + case ENT_QUERY_MIN: + case ENT_QUERY_MAX: + case ENT_QUERY_VALUE_MASSES: + if(ocn.size() < 1) + return; + break; + + default:; + } + + //next, determine if a a new condition should be made, or reuse the current one + bool requires_new_condition = true; //if true, create a new condition rather than using current_condition + switch(type) + { + case ENT_QUERY_NOT_EXISTS: + case ENT_QUERY_EXISTS: + case ENT_QUERY_NOT_EQUALS: + case ENT_QUERY_EQUALS: + case ENT_QUERY_NOT_BETWEEN: + requires_new_condition = (conditions.size() == 0 || conditions.back().queryType != type); + break; + + case ENT_QUERY_BETWEEN: + case ENT_QUERY_GREATER_OR_EQUAL_TO: + case ENT_QUERY_LESS_OR_EQUAL_TO: + { + //these three are equivalent + if(conditions.size() > 0) + { + EvaluableNodeType prev_type = conditions.back().queryType; + if(prev_type == ENT_QUERY_BETWEEN || prev_type == ENT_QUERY_GREATER_OR_EQUAL_TO || prev_type == ENT_QUERY_LESS_OR_EQUAL_TO) + requires_new_condition = false; + } + break; + } + + default:; + } + + //create a new condition if needed + if(requires_new_condition) + { + //create new condition + conditions.emplace_back(); + conditions.back().queryType = type; + } + + auto cur_condition = &(conditions.back()); + cur_condition->singleLabel = 0; + + //get label sid - always the first child node + StringInternPool::StringID label_sid = (ocn.size() >= 1) ? EvaluableNode::ToStringIDIfExists(ocn[0]) : StringInternPool::NOT_A_STRING_ID; + + //switch to return if label sid is invalid + switch(type) + { + case ENT_QUERY_NOT_EXISTS: + case ENT_QUERY_EXISTS: + case ENT_QUERY_MIN: + case ENT_QUERY_MAX: + case ENT_QUERY_SUM: + case ENT_QUERY_MODE: + case ENT_QUERY_QUANTILE: + case ENT_QUERY_GENERALIZED_MEAN: + case ENT_QUERY_MIN_DIFFERENCE: + case ENT_QUERY_MAX_DIFFERENCE: + case ENT_QUERY_VALUE_MASSES: + case ENT_QUERY_LESS_OR_EQUAL_TO: + case ENT_QUERY_GREATER_OR_EQUAL_TO: + case ENT_QUERY_NOT_EQUALS: + case ENT_QUERY_EQUALS: + case ENT_QUERY_BETWEEN: + case ENT_QUERY_NOT_BETWEEN: + case ENT_QUERY_AMONG: + case ENT_QUERY_NOT_AMONG: + { + if(!Entity::IsLabelValidAndPublic(label_sid)) + { + cur_condition->queryType = ENT_NULL; + return; + } + break; + } + default:; + } + + //actually populate the condition parameters from the evaluable nodes + switch(type) + { + case ENT_QUERY_SELECT: + { + cur_condition->maxToRetrieve = (ocn.size() >= 1) ? EvaluableNode::ToNumber(ocn[0], 0.0) : 0; + + cur_condition->hasStartOffset = (ocn.size() >= 2); + cur_condition->startOffset = cur_condition->hasStartOffset ? static_cast(EvaluableNode::ToNumber(ocn[1], 0.0)) : 0; + + cur_condition->hasRandomStream = (ocn.size() >= 3 && !EvaluableNode::IsEmptyNode(ocn[2])); + if(cur_condition->hasRandomStream) + cur_condition->randomStream.SetState(EvaluableNode::ToString(ocn[2])); + + break; + } + case ENT_QUERY_SAMPLE: + { + cur_condition->maxToRetrieve = (ocn.size() > 0) ? EvaluableNode::ToNumber(ocn[0], 0.0) : 1; + cur_condition->hasRandomStream = (ocn.size() > 1 && !EvaluableNode::IsEmptyNode(ocn[1])); + if(cur_condition->hasRandomStream) + cur_condition->randomStream.SetState(EvaluableNode::ToString(ocn[1])); + else + cur_condition->randomStream = rs.CreateOtherStreamViaRand(); + break; + } + case ENT_QUERY_WEIGHTED_SAMPLE: + { + cur_condition->singleLabel = (ocn.size() > 0) ? EvaluableNode::ToStringIDIfExists(ocn[0]) : StringInternPool::NOT_A_STRING_ID; + cur_condition->maxToRetrieve = (ocn.size() > 1) ? EvaluableNode::ToNumber(ocn[1], 0.0) : 1; + cur_condition->hasRandomStream = (ocn.size() > 2 && !EvaluableNode::IsEmptyNode(ocn[2])); + if(cur_condition->hasRandomStream) + cur_condition->randomStream.SetState(EvaluableNode::ToString(ocn[2])); + else + cur_condition->randomStream = rs.CreateOtherStreamViaRand(); + break; + } + case ENT_QUERY_IN_ENTITY_LIST: + case ENT_QUERY_NOT_IN_ENTITY_LIST: + { + if(ocn.size() >= 1) + { + EvaluableNode *entity_sids = ocn[0]; + if(EvaluableNode::IsOrderedArray(entity_sids)) + { + cur_condition->existLabels.reserve(entity_sids->GetOrderedChildNodes().size()); + for(auto &esid : entity_sids->GetOrderedChildNodes()) + { + StringInternPool::StringID entity_sid = EvaluableNode::ToStringIDIfExists(esid); + cur_condition->existLabels.push_back(entity_sid); + } + } + } + break; + } + case ENT_QUERY_BETWEEN: + case ENT_QUERY_NOT_BETWEEN: + { + //number of parameters checked above + EvaluableNode *low_value = ocn[1]; + EvaluableNode *high_value = ocn[2]; + + //since types need to match, force both to the same type + if(EvaluableNode::IsNativelyNumeric(low_value) || EvaluableNode::IsNativelyNumeric(high_value)) + { + cur_condition->pairedLabels.push_back(std::make_pair(label_sid, std::make_pair( + EvaluableNode::ToNumber(low_value), EvaluableNode::ToNumber(high_value)))); + + cur_condition->valueTypes.push_back(ENIVT_NUMBER); + } + else + { + StringInternPool::StringID low_sid = EvaluableNode::ToStringIDIfExists(low_value); + StringInternPool::StringID high_sid = EvaluableNode::ToStringIDIfExists(high_value); + + cur_condition->pairedLabels.push_back(std::make_pair(label_sid, std::make_pair(low_sid, high_sid))); + + cur_condition->valueTypes.push_back(ENIVT_STRING_ID); + } + + break; + } + + case ENT_QUERY_AMONG: + case ENT_QUERY_NOT_AMONG: + { + cur_condition->singleLabel = label_sid; + + //already checked for nullptr above + auto &values_ocn = ocn[1]->GetOrderedChildNodes(); + for(auto value_node : values_ocn) + { + EvaluableNodeImmediateValue value; + auto value_type = value.CopyValueFromEvaluableNode(value_node); + cur_condition->valueToCompare.push_back(value); + cur_condition->valueTypes.push_back(value_type); + } + + break; + } + + case ENT_QUERY_NOT_EXISTS: + case ENT_QUERY_EXISTS: + { + //get label and append it if it is valid (otherwise don't match on anything) + if(ocn.size() >= 1) + cur_condition->existLabels.push_back(label_sid); + + break; + } + + case ENT_QUERY_MIN: + case ENT_QUERY_MAX: + { + cur_condition->singleLabel = label_sid; + + //default to retrieve 1 + cur_condition->maxToRetrieve = 1; + if(ocn.size() >= 2) + { + EvaluableNode *value = ocn[1]; + cur_condition->maxToRetrieve = EvaluableNode::ToNumber(value); + } + + if(ocn.size() <= 2 || EvaluableNode::IsTrue(ocn[2])) + cur_condition->singleLabelType = ENIVT_NUMBER; + else + cur_condition->singleLabelType = ENIVT_STRING_ID; + + break; + } + + case ENT_QUERY_LESS_OR_EQUAL_TO: + case ENT_QUERY_GREATER_OR_EQUAL_TO: + { + //these query types will be transformed into a between query, including the appropriate infinite + + //number of parameters checked above + EvaluableNode *compare_value = ocn[1]; + + if(EvaluableNode::IsNativelyNumeric(compare_value)) + { + if(type == ENT_QUERY_LESS_OR_EQUAL_TO) + cur_condition->pairedLabels.push_back(std::make_pair(label_sid, std::make_pair( + -std::numeric_limits::infinity(), EvaluableNode::ToNumber(compare_value)))); + else + cur_condition->pairedLabels.push_back(std::make_pair(label_sid, std::make_pair( + EvaluableNode::ToNumber(compare_value), std::numeric_limits::infinity()))); + + cur_condition->valueTypes.push_back(ENIVT_NUMBER); + } + else + { + if(type == ENT_QUERY_LESS_OR_EQUAL_TO) + cur_condition->pairedLabels.push_back(std::make_pair(label_sid, std::make_pair( + string_intern_pool.NOT_A_STRING_ID, EvaluableNode::ToStringIDIfExists(compare_value)))); + else + cur_condition->pairedLabels.push_back(std::make_pair(label_sid, std::make_pair( + EvaluableNode::ToStringIDIfExists(compare_value), string_intern_pool.NOT_A_STRING_ID))); + + cur_condition->valueTypes.push_back(ENIVT_STRING_ID); + } + + cur_condition->queryType = ENT_QUERY_BETWEEN; + break; + } + + + case ENT_QUERY_NOT_EQUALS: + case ENT_QUERY_EQUALS: + { + EvaluableNodeImmediateValue value; + EvaluableNodeImmediateValueType value_type = value.CopyValueFromEvaluableNode(ocn[1]); + + cur_condition->valueTypes.push_back(value_type); + cur_condition->singleLabels.emplace_back(std::make_pair(label_sid, value)); + + break; + } + + case ENT_QUERY_MIN_DIFFERENCE: + cur_condition->singleLabel = label_sid; + + cur_condition->maxDistance = std::numeric_limits::quiet_NaN(); + if(ocn.size() >= 2) + cur_condition->maxDistance = EvaluableNode::ToNumber(ocn[1]); + + cur_condition->includeZeroDifferences = true; + if(ocn.size() >= 3) + cur_condition->includeZeroDifferences = EvaluableNode::IsTrue(ocn[2]); + break; + + case ENT_QUERY_MAX_DIFFERENCE: + cur_condition->singleLabel = label_sid; + + cur_condition->maxDistance = std::numeric_limits::quiet_NaN(); + if(ocn.size() >= 2) + cur_condition->maxDistance = EvaluableNode::ToNumber(ocn[1]); + + break; + + case ENT_QUERY_SUM: + case ENT_QUERY_MODE: + case ENT_QUERY_VALUE_MASSES: + { + cur_condition->singleLabel = label_sid; + + cur_condition->weightLabel = StringInternPool::NOT_A_STRING_ID; + if(ocn.size() >= 2) + cur_condition->weightLabel = EvaluableNode::ToStringIDIfExists(ocn[1]); + + if(type == ENT_QUERY_MODE || type == ENT_QUERY_VALUE_MASSES) + { + if(ocn.size() <= 2 || EvaluableNode::IsTrue(ocn[2])) + cur_condition->singleLabelType = ENIVT_NUMBER; + else + cur_condition->singleLabelType = ENIVT_STRING_ID; + } + + break; + } + + case ENT_QUERY_QUANTILE: + { + cur_condition->singleLabel = label_sid; + + cur_condition->qPercentage = 0.5; + if(ocn.size() >= 2) + cur_condition->qPercentage = EvaluableNode::ToNumber(ocn[1]); + + cur_condition->weightLabel = StringInternPool::NOT_A_STRING_ID; + if(ocn.size() >= 3) + cur_condition->weightLabel = EvaluableNode::ToStringIDIfExists(ocn[2]); + + break; + } + + case ENT_QUERY_GENERALIZED_MEAN: + { + cur_condition->singleLabel = label_sid; + + cur_condition->distParams.pValue = 1; + if(ocn.size() >= 2) + cur_condition->distParams.pValue = EvaluableNode::ToNumber(ocn[1]); + + cur_condition->weightLabel = StringInternPool::NOT_A_STRING_ID; + if(ocn.size() >= 3) + cur_condition->weightLabel = EvaluableNode::ToStringIDIfExists(ocn[2]); + + cur_condition->center = 0.0; + if(ocn.size() >= 4) + cur_condition->center = EvaluableNode::ToNumber(ocn[3], 0.0); + + cur_condition->calculateMoment = false; + if(ocn.size() >= 5) + cur_condition->calculateMoment = EvaluableNode::IsTrue(ocn[4]); + + cur_condition->absoluteValue = false; + if(ocn.size() >= 6) + cur_condition->absoluteValue = EvaluableNode::IsTrue(ocn[5]); + + break; + } + + default:; + }//end switch + } +}; diff --git a/src/Amalgam/entity/EntityQueryCaches.cpp b/src/Amalgam/entity/EntityQueryCaches.cpp new file mode 100644 index 00000000..af53babc --- /dev/null +++ b/src/Amalgam/entity/EntityQueryCaches.cpp @@ -0,0 +1,868 @@ +//project headers: +#include "Conviction.h" +#include "EntityQueries.h" +#include "EntityQueryCaches.h" + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) +thread_local +#endif +EntityQueryCaches::QueryCachesBuffers EntityQueryCaches::buffers; + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) +void EntityQueryCaches::EnsureLabelsAreCached(EntityQueryCondition *cond, Concurrency::ReadLock &lock) +#else +void EntityQueryCaches::EnsureLabelsAreCached(EntityQueryCondition *cond) +#endif +{ + //if there are any labels that need to be added, + // this will collected them to be added all at once + std::vector labels_to_add; + + //add label to cache if missing + switch(cond->queryType) + { + case ENT_QUERY_NEAREST_GENERALIZED_DISTANCE: + case ENT_QUERY_WITHIN_GENERALIZED_DISTANCE: + case ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS: + case ENT_COMPUTE_ENTITY_CONVICTIONS: + case ENT_COMPUTE_ENTITY_KL_DIVERGENCES: + case ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE: + { + for(auto label : cond->positionLabels) + { + if(!DoesHaveLabel(label)) + labels_to_add.push_back(label); + } + + if(cond->weightLabel != StringInternPool::NOT_A_STRING_ID) + { + if(!DoesHaveLabel(cond->weightLabel)) + labels_to_add.push_back(cond->weightLabel); + } + + if(cond->additionalSortedListLabel != StringInternPool::NOT_A_STRING_ID) + { + if(!DoesHaveLabel(cond->additionalSortedListLabel)) + labels_to_add.push_back(cond->additionalSortedListLabel); + } + + break; + } + + case ENT_QUERY_WEIGHTED_SAMPLE: + case ENT_QUERY_AMONG: + case ENT_QUERY_NOT_AMONG: + case ENT_QUERY_MIN: + case ENT_QUERY_MAX: + case ENT_QUERY_MIN_DIFFERENCE: + case ENT_QUERY_MAX_DIFFERENCE: + { + if(!DoesHaveLabel(cond->singleLabel)) + labels_to_add.push_back(cond->singleLabel); + + break; + } + + case ENT_QUERY_SUM: + case ENT_QUERY_MODE: + case ENT_QUERY_QUANTILE: + case ENT_QUERY_GENERALIZED_MEAN: + case ENT_QUERY_VALUE_MASSES: + { + if(!DoesHaveLabel(cond->singleLabel)) + labels_to_add.push_back(cond->singleLabel); + + if(cond->weightLabel != StringInternPool::NOT_A_STRING_ID) + { + if(!DoesHaveLabel(cond->weightLabel)) + labels_to_add.push_back(cond->weightLabel); + } + + break; + } + + case ENT_QUERY_EXISTS: + case ENT_QUERY_NOT_EXISTS: + { + for(auto label : cond->existLabels) + { + if(!DoesHaveLabel(label)) + labels_to_add.push_back(label); + } + break; + } + + case ENT_QUERY_EQUALS: + case ENT_QUERY_NOT_EQUALS: + { + for(auto &[label_id, _] : cond->singleLabels) + { + if(!DoesHaveLabel(label_id)) + labels_to_add.push_back(label_id); + } + break; + } + + default: + { + for(auto &[label_id, _] : cond->pairedLabels) + { + if(!DoesHaveLabel(label_id)) + labels_to_add.push_back(label_id); + } + } + } + + + if(labels_to_add.size() == 0) + return; + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + lock.unlock(); + Concurrency::WriteLock write_lock(mutex); + + //now with write_lock, remove any labels that might have already been added by other threads + labels_to_add.erase(std::remove_if(begin(labels_to_add), end(labels_to_add), + [this](auto sid) { return DoesHaveLabel(sid); }), + end(labels_to_add)); + + //need to double-check to make sure that another thread didn't already rebuild + if(labels_to_add.size() > 0) +#endif + sbfds.AddLabels(labels_to_add, container->GetContainedEntities()); + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //release write lock and reacquire read lock + write_lock.unlock(); + lock.lock(); +#endif +} + +void EntityQueryCaches::GetMatchingEntities(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, + std::vector> &compute_results, bool is_first, bool update_matching_entities) +{ +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(mutex); + EnsureLabelsAreCached(cond, lock); +#else + EnsureLabelsAreCached(cond); +#endif + + switch(cond->queryType) + { + case ENT_QUERY_EXISTS: + { + for(auto label : cond->existLabels) + { + if(is_first) + { + sbfds.FindAllEntitiesWithFeature(label, matching_entities); + is_first = false; + } + else + sbfds.IntersectEntitiesWithFeature(label, matching_entities); + } + return; + } + + case ENT_QUERY_NOT_EXISTS: + { + for(auto label : cond->existLabels) + { + if(is_first) + { + sbfds.FindAllEntitiesWithoutFeature(label, matching_entities); + is_first = false; + } + else + sbfds.IntersectEntitiesWithoutFeature(label, matching_entities); + } + return; + } + + case ENT_QUERY_NEAREST_GENERALIZED_DISTANCE: + case ENT_QUERY_WITHIN_GENERALIZED_DISTANCE: + case ENT_COMPUTE_ENTITY_CONVICTIONS: + case ENT_COMPUTE_ENTITY_KL_DIVERGENCES: + case ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE: + case ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS: + { + //get entity (case) weighting if applicable + bool use_entity_weights = (cond->weightLabel != StringInternPool::NOT_A_STRING_ID); + size_t weight_column = std::numeric_limits::max(); + if(use_entity_weights) + weight_column = sbfds.GetColumnIndexFromLabelId(cond->weightLabel); + + auto get_weight = sbfds.GetNumberValueFromEntityIndexFunction(weight_column); + EntityQueriesStatistics::DistanceTransform distance_transform(cond->transformSuprisalToProb, + cond->distanceWeightExponent, use_entity_weights, get_weight); + + if(cond->queryType == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE || cond->queryType == ENT_QUERY_WITHIN_GENERALIZED_DISTANCE) + { + //labels and values must have the same size + if(cond->valueToCompare.size() != cond->positionLabels.size()) + { + matching_entities.clear(); + return; + } + + //if first, need to populate with all entities + if(is_first) + { + matching_entities.clear(); + matching_entities.SetAllIds(sbfds.GetNumInsertedEntities()); + } + + //if no position labels, then the weight must be zero so just randomly choose k + if(cond->positionLabels.size() == 0) + { + BitArrayIntegerSet &temp = buffers.tempMatchingEntityIndices; + temp = matching_entities; + matching_entities.clear(); + + auto rand_stream = cond->randomStream.CreateOtherStreamViaRand(); + + //insert each case and compute to zero distance because the distance because weight was zero to get here + size_t num_to_retrieve = std::min(static_cast(cond->maxToRetrieve), temp.size()); + for(size_t i = 0; i < num_to_retrieve; i++) + { + size_t rand_index = temp.GetRandomElement(rand_stream); + temp.erase(rand_index); + matching_entities.insert(rand_index); + compute_results.emplace_back(0.0, rand_index); + } + } + else if(cond->queryType == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE) + { + sbfds.FindNearestEntities(cond->distParams, cond->positionLabels, cond->valueToCompare, cond->valueTypes, + static_cast(cond->maxToRetrieve), cond->exclusionLabel, matching_entities, + compute_results, cond->randomStream.CreateOtherStreamViaRand()); + } + else //ENT_QUERY_WITHIN_GENERALIZED_DISTANCE + { + sbfds.FindEntitiesWithinDistance(cond->distParams, cond->positionLabels, cond->valueToCompare, cond->valueTypes, + cond->maxDistance, matching_entities, compute_results); + } + + distance_transform.TransformDistances(compute_results, cond->returnSortedList); + + //populate matching_entities if needed + if(update_matching_entities) + { + matching_entities.clear(); + for(auto &it : compute_results) + matching_entities.insert(it.reference); + } + } + else //cond->queryType == ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS or ENT_COMPUTE_ENTITY_CONVICTIONS or ENT_COMPUTE_ENTITY_KL_DIVERGENCES or ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE + { + size_t total_contained_entities = sbfds.GetNumInsertedEntities(); + if(total_contained_entities == 0) + return; + + //if there are no existLabels, or number of existLabels is same as the number of entities in cache, we don't compute on subset + const bool compute_on_subset = (cond->existLabels.size() != 0 && cond->existLabels.size() < total_contained_entities); + + size_t top_k = std::min(static_cast(cond->maxToRetrieve), total_contained_entities); + + BitArrayIntegerSet *ents_to_compute_ptr = nullptr; //if nullptr, compute is done on all entities in the cache + + if(compute_on_subset) //if subset is specified, set ents_to_compute_ptr to set of ents_to_compute + { + ents_to_compute_ptr = &buffers.tempMatchingEntityIndices; + ents_to_compute_ptr->clear(); + + if(cond->queryType == ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE) + { + //determine the base entities by everything not in the list + *ents_to_compute_ptr = matching_entities; + + for(auto entity_sid : cond->existLabels) + { + size_t entity_index = container->GetContainedEntityIndex(entity_sid); + ents_to_compute_ptr->erase(entity_index); + } + } + else + { + for(auto entity_sid : cond->existLabels) + { + size_t entity_index = container->GetContainedEntityIndex(entity_sid); + if(entity_index != std::numeric_limits::max()) + ents_to_compute_ptr->insert(entity_index); + } + + //make sure everything asked to be computed is in the base set of entities + ents_to_compute_ptr->Intersect(matching_entities); + } + } + else //compute on all + { + ents_to_compute_ptr = &matching_entities; + } + + //only select cases that have all of the correct features + for(auto i : cond->positionLabels) + sbfds.IntersectEntitiesWithFeature(i, *ents_to_compute_ptr); + + #ifdef MULTITHREAD_SUPPORT + ConvictionProcessor conviction_processor(buffers.convictionBuffers, + buffers.knnCache, distance_transform, top_k, cond->useConcurrency); + #else + ConvictionProcessor conviction_processor(buffers.convictionBuffers, + buffers.knnCache, distance_transform, top_k); + #endif + buffers.knnCache.ResetCache(sbfds, matching_entities, cond->distParams, cond->positionLabels); + + auto &results_buffer = buffers.doubleVector; + results_buffer.clear(); + + if(cond->queryType == ENT_COMPUTE_ENTITY_CONVICTIONS) + { + conviction_processor.ComputeCaseKLDivergences(*ents_to_compute_ptr, results_buffer, true, cond->convictionOfRemoval); + } + else if(cond->queryType == ENT_COMPUTE_ENTITY_KL_DIVERGENCES) + { + conviction_processor.ComputeCaseKLDivergences(*ents_to_compute_ptr, results_buffer, false, cond->convictionOfRemoval); + } + else if(cond->queryType == ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE) + { + double group_conviction = conviction_processor.ComputeCaseGroupKLDivergence(*ents_to_compute_ptr, cond->convictionOfRemoval); + + compute_results.clear(); + compute_results.emplace_back(group_conviction, 0); + + //early exit because don't need to translate distances + return; + } + else //ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS + { + conviction_processor.ComputeDistanceContributions(ents_to_compute_ptr, results_buffer); + } + + //clear compute_results as it may have been used for intermediate results + compute_results.clear(); + if(ents_to_compute_ptr == nullptr) + { + //computed on globals, so convert results to global coordinates paired with their contributions + compute_results.reserve(results_buffer.size()); + + for(size_t i = 0; i < results_buffer.size(); i++) + compute_results.emplace_back(results_buffer[i], i); + } + else //computed on a subset; use ents_to_compute_ptr because don't know what it points to + { + compute_results.reserve(ents_to_compute_ptr->size()); + size_t i = 0; + for(const auto &ent_index : *ents_to_compute_ptr) + compute_results.emplace_back(results_buffer[i++], ent_index); + } + + if(cond->returnSortedList) + { + std::sort(begin(compute_results), end(compute_results), + [](auto a, auto b) {return a.distance < b.distance; } + ); + } + } + + break; + } + + case ENT_QUERY_EQUALS: + { + bool first_feature = is_first; + + //loop over all features + for(size_t i = 0; i < cond->singleLabels.size(); i++) + { + auto &[label_id, compare_value] = cond->singleLabels[i]; + auto compare_type = cond->valueTypes[i]; + + if(first_feature) + { + matching_entities.clear(); + sbfds.UnionAllEntitiesWithValue(label_id, compare_type, compare_value, matching_entities); + first_feature = false; + } + else //get corresponding indices and intersect with results + { + BitArrayIntegerSet &temp = buffers.tempMatchingEntityIndices; + temp.clear(); + sbfds.UnionAllEntitiesWithValue(label_id, compare_type, compare_value, temp); + matching_entities.Intersect(temp); + } + } + + break; + } + + case ENT_QUERY_NOT_EQUALS: + { + bool first_feature = is_first; + + //loop over all features + for(size_t i = 0; i < cond->singleLabels.size(); i++) + { + auto &[label_id, compare_value] = cond->singleLabels[i]; + auto compare_type = cond->valueTypes[i]; + + if(first_feature) + { + matching_entities.clear(); + sbfds.FindAllEntitiesWithFeature(label_id, matching_entities); + first_feature = false; + } + + BitArrayIntegerSet &temp = buffers.tempMatchingEntityIndices; + temp.clear(); + sbfds.UnionAllEntitiesWithValue(label_id, compare_type, compare_value, temp); + matching_entities.EraseInBatch(temp); + } + matching_entities.UpdateNumElements(); + + break; + } + + case ENT_QUERY_BETWEEN: + case ENT_QUERY_NOT_BETWEEN: + { + bool first_feature = is_first; + BitArrayIntegerSet &temp = buffers.tempMatchingEntityIndices; + + //loop over all features + for(size_t i = 0; i < cond->pairedLabels.size(); i++) + { + auto label_id = cond->pairedLabels[i].first; + auto &[low_value, high_value] = cond->pairedLabels[i].second; + + if(first_feature) + { + sbfds.FindAllEntitiesWithinRange(label_id, cond->valueTypes[i], + low_value, high_value, matching_entities, cond->queryType == ENT_QUERY_BETWEEN); + first_feature = false; + } + else //get corresponding indices and intersect with results + { + temp.clear(); + sbfds.FindAllEntitiesWithinRange(label_id, cond->valueTypes[i], + low_value, high_value, temp, cond->queryType == ENT_QUERY_BETWEEN); + matching_entities.Intersect(temp); + } + } + + break; + } + + case ENT_QUERY_MIN: + case ENT_QUERY_MAX: + { + size_t max_to_retrieve = static_cast(cond->maxToRetrieve); + + if(is_first) + { + sbfds.FindMinMax(cond->singleLabel, cond->singleLabelType, max_to_retrieve, + (cond->queryType == ENT_QUERY_MAX), nullptr, matching_entities); + } + else + { + //move data to temp and compute into matching_entities + BitArrayIntegerSet &temp = buffers.tempMatchingEntityIndices; + temp = matching_entities; + matching_entities.clear(); + sbfds.FindMinMax(cond->singleLabel, cond->singleLabelType, max_to_retrieve, + (cond->queryType == ENT_QUERY_MAX), &temp, matching_entities); + } + break; + } + + case ENT_QUERY_AMONG: + { + if(is_first) + { + for(size_t i = 0; i < cond->valueToCompare.size(); i++) + sbfds.UnionAllEntitiesWithValue(cond->singleLabel, cond->valueTypes[i], cond->valueToCompare[i], matching_entities); + } + else + { + //get set of entities that are valid + BitArrayIntegerSet &temp = buffers.tempMatchingEntityIndices; + temp.clear(); + for(size_t i = 0; i < cond->valueToCompare.size(); i++) + sbfds.UnionAllEntitiesWithValue(cond->singleLabel, cond->valueTypes[i], cond->valueToCompare[i], temp); + + //only keep those that have a matching value + matching_entities.Intersect(temp); + } + + break; + } + + case ENT_QUERY_NOT_AMONG: + { + //ensure that the feature exists + if(is_first) + sbfds.FindAllEntitiesWithFeature(cond->singleLabel, matching_entities); + else + sbfds.IntersectEntitiesWithFeature(cond->singleLabel, matching_entities); + + BitArrayIntegerSet &temp = buffers.tempMatchingEntityIndices; + temp.clear(); + //get set of entities that are valid + for(size_t i = 0; i < cond->valueToCompare.size(); i++) + sbfds.UnionAllEntitiesWithValue(cond->singleLabel, cond->valueTypes[i], cond->valueToCompare[i], temp); + + //only keep those that have a matching value + matching_entities.erase(temp); + + break; + } + + case ENT_QUERY_SUM: + case ENT_QUERY_MODE: + case ENT_QUERY_QUANTILE: + case ENT_QUERY_GENERALIZED_MEAN: + case ENT_QUERY_MIN_DIFFERENCE: + case ENT_QUERY_MAX_DIFFERENCE: + { + size_t column_index = sbfds.GetColumnIndexFromLabelId(cond->singleLabel); + if(column_index == std::numeric_limits::max()) + { + compute_results.emplace_back(std::numeric_limits::quiet_NaN(), 0); + return; + } + + size_t weight_column_index = sbfds.GetColumnIndexFromLabelId(cond->weightLabel); + bool has_weight = false; + if(weight_column_index != std::numeric_limits::max()) + has_weight = true; + else //just use a valid column + weight_column_index = 0; + + double result = 0.0; + + if(is_first) + { + EfficientIntegerSet &entities = sbfds.GetEntitiesWithValidNumbers(column_index); + auto get_value = sbfds.GetNumberValueFromEntityIteratorFunction(column_index); + auto get_weight = sbfds.GetNumberValueFromEntityIteratorFunction(weight_column_index); + + switch(cond->queryType) + { + case ENT_QUERY_SUM: + result = EntityQueriesStatistics::Sum(entities.begin(), entities.end(), get_value, has_weight, get_weight); + break; + + case ENT_QUERY_MODE: + result = EntityQueriesStatistics::ModeNumber(entities.begin(), entities.end(), get_value, has_weight, get_weight); + break; + + case ENT_QUERY_QUANTILE: + result = EntityQueriesStatistics::Quantile(entities.begin(), entities.end(), get_value, + has_weight, get_weight, cond->qPercentage, EntityQueryCaches::buffers.pairDoubleVector); + break; + + case ENT_QUERY_GENERALIZED_MEAN: + result = EntityQueriesStatistics::GeneralizedMean(entities.begin(), entities.end(), get_value, + has_weight, get_weight, cond->distParams.pValue, cond->center, cond->calculateMoment, cond->absoluteValue); + break; + + case ENT_QUERY_MIN_DIFFERENCE: + result = EntityQueriesStatistics::ExtremeDifference(entities.begin(), entities.end(), get_value, true, + cond->maxDistance, cond->includeZeroDifferences, EntityQueryCaches::buffers.doubleVector); + break; + + case ENT_QUERY_MAX_DIFFERENCE: + result = EntityQueriesStatistics::ExtremeDifference(entities.begin(), entities.end(), get_value, false, + cond->maxDistance, cond->includeZeroDifferences, EntityQueryCaches::buffers.doubleVector); + break; + + default: + break; + } + } + else + { + auto get_value = sbfds.GetNumberValueFromEntityIteratorFunction(column_index); + auto get_weight = sbfds.GetNumberValueFromEntityIteratorFunction(weight_column_index); + + switch(cond->queryType) + { + case ENT_QUERY_SUM: + result = EntityQueriesStatistics::Sum(matching_entities.begin(), matching_entities.end(), get_value, has_weight, get_weight); + break; + + case ENT_QUERY_MODE: + result = EntityQueriesStatistics::ModeNumber(matching_entities.begin(), matching_entities.end(), get_value, has_weight, get_weight); + break; + + case ENT_QUERY_QUANTILE: + result = EntityQueriesStatistics::Quantile(matching_entities.begin(), matching_entities.end(), get_value, + has_weight, get_weight, cond->qPercentage, EntityQueryCaches::buffers.pairDoubleVector); + break; + + case ENT_QUERY_GENERALIZED_MEAN: + result = EntityQueriesStatistics::GeneralizedMean(matching_entities.begin(), matching_entities.end(), get_value, + has_weight, get_weight, cond->distParams.pValue, cond->center, cond->calculateMoment, cond->absoluteValue); + break; + + case ENT_QUERY_MIN_DIFFERENCE: + result = EntityQueriesStatistics::ExtremeDifference(matching_entities.begin(), matching_entities.end(), get_value, true, + cond->maxDistance, cond->includeZeroDifferences, EntityQueryCaches::buffers.doubleVector); + break; + + case ENT_QUERY_MAX_DIFFERENCE: + result = EntityQueriesStatistics::ExtremeDifference(matching_entities.begin(), matching_entities.end(), get_value, false, + cond->maxDistance, cond->includeZeroDifferences, EntityQueryCaches::buffers.doubleVector); + break; + + default: + break; + } + } + + compute_results.emplace_back(result, 0); + return; + } + + default: // Other Enum value not handled + { + break; + } + } +} + +bool EntityQueryCaches::ComputeValueFromMatchingEntities(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, + StringInternPool::StringID &compute_result, bool is_first) +{ +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(mutex); + EnsureLabelsAreCached(cond, lock); +#else + EnsureLabelsAreCached(cond); +#endif + + switch(cond->queryType) + { + case ENT_QUERY_MODE: + { + size_t column_index = sbfds.GetColumnIndexFromLabelId(cond->singleLabel); + if(column_index == std::numeric_limits::max()) + return false; + + size_t weight_column_index = sbfds.GetColumnIndexFromLabelId(cond->weightLabel); + bool has_weight = false; + if(weight_column_index != std::numeric_limits::max()) + has_weight = true; + else //just use a valid column + weight_column_index = 0; + + if(is_first) + { + EfficientIntegerSet &entities = sbfds.GetEntitiesWithValidStringIds(column_index); + auto get_value = sbfds.GetStringIdValueFromEntityIteratorFunction(column_index); + auto get_weight = sbfds.GetNumberValueFromEntityIteratorFunction(weight_column_index); + auto [found, mode_id] = EntityQueriesStatistics::ModeStringId( + entities.begin(), entities.end(), get_value, has_weight, get_weight); + + compute_result = mode_id; + return found; + } + else + { + auto get_value = sbfds.GetStringIdValueFromEntityIteratorFunction(column_index); + auto get_weight = sbfds.GetNumberValueFromEntityIteratorFunction(weight_column_index); + auto [found, mode_id] = EntityQueriesStatistics::ModeStringId( + matching_entities.begin(), matching_entities.end(), get_value, has_weight, get_weight); + + compute_result = mode_id; + return found; + } + } + default: + break; + } + + return false; +} + +void EntityQueryCaches::ComputeValuesFromMatchingEntities(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, + FastHashMap, DoubleNanHashComparator> &compute_results, bool is_first) +{ +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(mutex); + EnsureLabelsAreCached(cond, lock); +#else + EnsureLabelsAreCached(cond); +#endif + + switch(cond->queryType) + { + case ENT_QUERY_VALUE_MASSES: + { + size_t column_index = sbfds.GetColumnIndexFromLabelId(cond->singleLabel); + if(column_index == std::numeric_limits::max()) + return; + + size_t weight_column_index = sbfds.GetColumnIndexFromLabelId(cond->weightLabel); + bool has_weight = false; + if(weight_column_index != std::numeric_limits::max()) + has_weight = true; + else //just use a valid column + weight_column_index = 0; + + size_t num_unique_values = sbfds.GetNumUniqueValuesForColumn(column_index, ENIVT_NUMBER); + + if(is_first) + { + EfficientIntegerSet &entities = sbfds.GetEntitiesWithValidNumbers(column_index); + auto get_value = sbfds.GetNumberValueFromEntityIteratorFunction(column_index); + auto get_weight = sbfds.GetNumberValueFromEntityIteratorFunction(weight_column_index); + compute_results = EntityQueriesStatistics::ValueMassesNumber(entities.begin(), entities.end(), + num_unique_values, get_value, has_weight, get_weight); + } + else + { + auto get_value = sbfds.GetNumberValueFromEntityIteratorFunction(column_index); + auto get_weight = sbfds.GetNumberValueFromEntityIteratorFunction(weight_column_index); + compute_results = EntityQueriesStatistics::ValueMassesNumber(matching_entities.begin(), matching_entities.end(), + num_unique_values, get_value, has_weight, get_weight); + } + return; + } + default: + break; + } +} + +void EntityQueryCaches::ComputeValuesFromMatchingEntities(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, + FastHashMap &compute_results, bool is_first) +{ +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(mutex); + EnsureLabelsAreCached(cond, lock); +#else + EnsureLabelsAreCached(cond); +#endif + + switch(cond->queryType) + { + case ENT_QUERY_VALUE_MASSES: + { + size_t column_index = sbfds.GetColumnIndexFromLabelId(cond->singleLabel); + if(column_index == std::numeric_limits::max()) + return; + + size_t weight_column_index = sbfds.GetColumnIndexFromLabelId(cond->weightLabel); + bool has_weight = false; + if(weight_column_index != std::numeric_limits::max()) + has_weight = true; + else //just use a valid column + weight_column_index = 0; + + size_t num_unique_values = sbfds.GetNumUniqueValuesForColumn(column_index, ENIVT_STRING_ID); + + if(is_first) + { + EfficientIntegerSet &entities = sbfds.GetEntitiesWithValidStringIds(column_index); + auto get_value = sbfds.GetStringIdValueFromEntityIteratorFunction(column_index); + auto get_weight = sbfds.GetNumberValueFromEntityIteratorFunction(weight_column_index); + compute_results = EntityQueriesStatistics::ValueMassesStringId(entities.begin(), entities.end(), + num_unique_values, get_value, has_weight, get_weight); + } + else + { + auto get_value = sbfds.GetStringIdValueFromEntityIteratorFunction(column_index); + auto get_weight = sbfds.GetNumberValueFromEntityIteratorFunction(weight_column_index); + compute_results = EntityQueriesStatistics::ValueMassesStringId(matching_entities.begin(), matching_entities.end(), + num_unique_values, get_value, has_weight, get_weight); + } + + return; + } + default: + break; + } +} + +void EntityQueryCaches::GetMatchingEntitiesViaSamplingWithReplacement(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, std::vector &entity_indices_sampled, bool is_first, bool update_matching_entities) +{ +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(mutex); + EnsureLabelsAreCached(cond, lock); +#else + EnsureLabelsAreCached(cond); +#endif + + size_t num_to_sample = static_cast(cond->maxToRetrieve); + + auto &probabilities = EntityQueryCaches::buffers.doubleVector; + auto &entity_indices = EntityQueryCaches::buffers.entityIndices; + + if(is_first) + sbfds.FindAllEntitiesWithValidNumbers(cond->singleLabel, matching_entities, entity_indices, probabilities); + else + sbfds.IntersectEntitiesWithValidNumbers(cond->singleLabel, matching_entities, entity_indices, probabilities); + + //don't attempt to continue if no elements + if(matching_entities.size() == 0) + return; + + if(update_matching_entities) + matching_entities.clear(); + + NormalizeProbabilities(probabilities); + + //if not sampling many, then brute force it + if(num_to_sample < 10) + { + //sample the entities + for(size_t i = 0; i < num_to_sample; i++) + { + size_t selected_entity_index = WeightedDiscreteRandomSample(probabilities, cond->randomStream); + auto eid = entity_indices[selected_entity_index]; + + if(update_matching_entities) + matching_entities.insert(eid); + else + entity_indices_sampled.push_back(eid); + } + } + else //sampling a bunch, better to precompute and use faster method + { + //a table for quickly generating entity indices based on weights + WeightedDiscreteRandomStreamTransform> ewt(entity_indices, probabilities, false); + + //sample the entities + for(size_t i = 0; i < num_to_sample; i++) + { + auto eid = ewt.WeightedDiscreteRand(cond->randomStream); + + if(update_matching_entities) + matching_entities.insert(eid); + else + entity_indices_sampled.push_back(eid); + } + } +} + +bool EntityQueryCaches::DoesCachedConditionMatch(EntityQueryCondition *cond, bool last_condition) +{ + EvaluableNodeType qt = cond->queryType; + + if(qt == ENT_QUERY_NEAREST_GENERALIZED_DISTANCE || qt == ENT_QUERY_WITHIN_GENERALIZED_DISTANCE || qt == ENT_COMPUTE_ENTITY_CONVICTIONS + || qt == ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE || qt == ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS || qt == ENT_COMPUTE_ENTITY_KL_DIVERGENCES) + { + //does not allow radii + if(cond->singleLabel != StringInternPool::NOT_A_STRING_ID) + return false; + + //TODO 4948: sbfds does not fully support p0 acceleration; it requires templating and calling logs of differences, then performing an inverse transform at the end + if(cond->distParams.pValue == 0) + return false; + + return true; + } + + return true; +} diff --git a/src/Amalgam/entity/EntityQueryCaches.h b/src/Amalgam/entity/EntityQueryCaches.h new file mode 100644 index 00000000..c924a720 --- /dev/null +++ b/src/Amalgam/entity/EntityQueryCaches.h @@ -0,0 +1,186 @@ +#pragma once + +//project headers: +#include "Conviction.h" +#include "Entity.h" +#include "HashMaps.h" +#include "IntegerSet.h" +#include "KnnCache.h" +#include "SeparableBoxFilterDataStore.h" +#include "StringInternPool.h" +#include "WeightedDiscreteRandomStream.h" + +//system headers: +#include +#include +#include + +//stores all of the types of caches needed for queries on a particular entity +class EntityQueryCaches +{ +public: + + EntityQueryCaches(Entity *_container) : container(_container) + { } + + //adds the entity to the cache + // container should contain entity + // entity_index is the index that the entity should be stored as + inline void AddEntity(Entity *e, size_t entity_index, bool batch_add = false) + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //don't lock if batch_call is set + Concurrency::WriteLock write_lock(mutex, std::defer_lock); + if(!batch_add) + write_lock.lock(); + #endif + + sbfds.AddEntity(e, entity_index); + } + + //like AddEntity, but removes the entity from the cache and reassigns entity_index_to_reassign to use the old + // entity_index; for example, if entity_index 3 is being removed and 5 is the highest index, if entity_index_to_reassign is 5, + // then this function will move the entity data that was previously in index 5 to be referenced by index 3 for all caches + inline void RemoveEntity(Entity *e, size_t entity_index, size_t entity_index_to_reassign, bool batch_remove = false) + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //don't lock if batch_call is set + Concurrency::WriteLock write_lock(mutex, std::defer_lock); + if(!batch_remove) + write_lock.lock(); + #endif + + sbfds.RemoveEntity(e, entity_index, entity_index_to_reassign); + } + + //updates all of the label values for entity e with index entity_index + inline void UpdateAllEntityLabels(Entity *entity, size_t entity_index) + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::WriteLock write_lock(mutex); + #endif + + sbfds.UpdateAllEntityLabels(entity, entity_index); + } + + //like UpdateAllEntityLabels, but only updates labels for the keys of labels_updated + inline void UpdateEntityLabels(Entity *entity, size_t entity_index, EvaluableNode::AssocType &labels_updated) + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::WriteLock write_lock(mutex); + #endif + + for(auto &[label_id, _] : labels_updated) + sbfds.UpdateEntityLabel(entity, entity_index, label_id); + } + + //like UpdateEntityLabels, but only updates labels for the keys of labels_updated that are not in labels_previous + // or where the value has changed + inline void UpdateEntityLabelsAddedOrChanged(Entity *entity, size_t entity_index, + EvaluableNode::AssocType &labels_previous, EvaluableNode::AssocType &labels_updated) + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::WriteLock write_lock(mutex); + #endif + + for(auto &[label_id, label] : labels_updated) + { + auto prev_entry = labels_previous.find(label_id); + + //if not found or different, need to update the label + if(prev_entry == end(labels_previous) || prev_entry->second != label) + sbfds.UpdateEntityLabel(entity, entity_index, label_id); + } + } + + //like UpdateAllEntityLabels, but only updates labels for label_updated + inline void UpdateEntityLabel(Entity *entity, size_t entity_index, StringInternPool::StringID label_updated) + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::WriteLock write_lock(mutex); + #endif + + sbfds.UpdateEntityLabel(entity, entity_index, label_updated); + } + + //specifies that this cache can be used for the input condition + static bool DoesCachedConditionMatch(EntityQueryCondition *cond, bool last_condition); + + //returns true if the cache already has the label specified + inline bool DoesHaveLabel(StringInternPool::StringID label_id) + { + return sbfds.DoesHaveLabel(label_id); + } + + //makes sure any labels needed for cond are in the cache +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + void EnsureLabelsAreCached(EntityQueryCondition *cond, Concurrency::ReadLock &lock); +#else + void EnsureLabelsAreCached(EntityQueryCondition *cond); +#endif + + //returns the set matching_entities of entity ids in the cache that match the provided query condition cond, will fill compute_results with numeric results if KNN query + //if is_first is true, optimizes to skip unioning results with matching_entities (just overwrites instead). + void GetMatchingEntities(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, std::vector> &compute_results, bool is_first, bool update_matching_entities); + + //like GetMatchingEntities, but returns a string id + bool ComputeValueFromMatchingEntities(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, StringInternPool::StringID &compute_result, bool is_first); + + //like GetMatchingEntities, but returns a flat_hash_map of numbers to numbers + void ComputeValuesFromMatchingEntities(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, FastHashMap, DoubleNanHashComparator> &compute_results, bool is_first); + + //like GetMatchingEntities, but returns a flat_hash_map of string ids to numbers + //returns true if value was computed, false if not valid + void ComputeValuesFromMatchingEntities(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, FastHashMap &compute_results, bool is_first); + + //like GetMatchingEntities, but returns entity_indices_sampled + void GetMatchingEntitiesViaSamplingWithReplacement(EntityQueryCondition *cond, BitArrayIntegerSet &matching_entities, std::vector &entity_indices_sampled, bool is_first, bool update_matching_entities); + + //the container this is a cache for + Entity *container; + + SeparableBoxFilterDataStore sbfds; + + //buffers to be reused for less memory churn + struct QueryCachesBuffers + { + //for storting compute results + std::vector> computeResultsIdToValue; + + //buffer to keep track of which entities are currently matching + BitArrayIntegerSet currentMatchingEntities; + + //temporary buffer when needed to perform set operations with currentMatchingEntities + BitArrayIntegerSet tempMatchingEntityIndices; + + //buffer for entity indices + std::vector entityIndices; + + //buffer for sampled entity indices with replacement / duplicates + std::vector entityIndicesWithDuplicates; + + //buffer for doubles + std::vector doubleVector; + + //buffer for doubles pairs + std::vector> pairDoubleVector; + + //nearest neighbors cache + KnnNonZeroDistanceQuerySBFCache knnCache; + + //for conviction calculations + ConvictionProcessor::ConvictionProcessorBuffers convictionBuffers; + }; + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //mutex for operations that may edit or modify the query cache + Concurrency::ReadWriteMutex mutex; +#endif + + //for multithreading, there should be one of these per thread +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + thread_local +#endif + //buffers that can be used for less memory churn (per-thread if multithreaded) + static QueryCachesBuffers buffers; +}; diff --git a/src/Amalgam/entity/EntityQueryManager.h b/src/Amalgam/entity/EntityQueryManager.h new file mode 100644 index 00000000..44caac4e --- /dev/null +++ b/src/Amalgam/entity/EntityQueryManager.h @@ -0,0 +1,209 @@ +#pragma once + +//project headers: +#include "EntityQueryCaches.h" +#include "IntegerSet.h" + +//system headers: +#include + +class EntityQueryManager +{ +public: + + //searches container for contained entities matching query. + // if return_query_value is false, then returns a list of all IDs of matching contained entities + // if return_query_value is true, then returns whatever the appropriate structure is for the query type for the final query + static EvaluableNodeReference GetEntitiesMatchingQuery(Entity *container, std::vector &conditions, EvaluableNodeManager *enm, bool return_query_value); + + //returns the collection of entities (and optionally associated compute values) that satisfy the specified chain of query conditions + // uses efficient querying methods with a query database, one database per container + static EvaluableNodeReference GetMatchingEntitiesFromQueryCaches(Entity *container, std::vector &conditions, EvaluableNodeManager *enm, bool return_query_value); + + //returns the numeric query cache associated with the specified container, creates one if one does not already exist + static EntityQueryCaches *GetQueryCachesForContainer(Entity *container); + + //updates when entity contents have changed + // container should contain entity + // entity_index is the index that the entity should be stored as + inline static void UpdateAllEntityLabels(Entity *container, Entity *entity, size_t entity_index) + { + if(entity == nullptr || container == nullptr) + return; + + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(queryCacheMutex); + #endif + + auto found_cache = queryCaches.find(container); + if(found_cache != end(queryCaches)) + found_cache->second->UpdateAllEntityLabels(entity, entity_index); + } + + //like UpdateAllEntityLabels, but only updates labels for the keys of labels_updated + inline static void UpdateEntityLabels(Entity *container, Entity *entity, size_t entity_index, + EvaluableNode::AssocType &labels_updated) + { + if(entity == nullptr || container == nullptr) + return; + + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(queryCacheMutex); + #endif + + auto found_cache = queryCaches.find(container); + if(found_cache != end(queryCaches)) + found_cache->second->UpdateEntityLabels(entity, entity_index, labels_updated); + } + + //like UpdateEntityLabels, but only updates labels for the keys of labels_updated that are not in labels_previous + // or where the value has changed + inline static void UpdateEntityLabelsAddedOrChanged(Entity *container, Entity *entity, size_t entity_index, + EvaluableNode::AssocType &labels_previous, EvaluableNode::AssocType &labels_updated) + { + if(entity == nullptr || container == nullptr) + return; + + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(queryCacheMutex); + #endif + + auto found_cache = queryCaches.find(container); + if(found_cache != end(queryCaches)) + found_cache->second->UpdateEntityLabelsAddedOrChanged(entity, entity_index, labels_previous, labels_updated); + } + + //like UpdateAllEntityLabels, but only updates labels for label_updated + inline static void UpdateEntityLabel(Entity *container, Entity *entity, size_t entity_index, + StringInternPool::StringID label_updated) + { + if(entity == nullptr || container == nullptr) + return; + + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(queryCacheMutex); + #endif + + auto found_cache = queryCaches.find(container); + if(found_cache != end(queryCaches)) + found_cache->second->UpdateEntityLabel(entity, entity_index, label_updated); + } + + //like UpdateEntityLabels, but adds the entity to the cache + inline static void AddEntity(Entity *container, Entity *entity, size_t entity_index) + { + if(entity == nullptr || container == nullptr) + return; + + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(queryCacheMutex); + #endif + + auto found_cache = queryCaches.find(container); + if(found_cache != end(queryCaches)) + found_cache->second->AddEntity(entity, entity_index); + } + + //like UpdateEntityLabels, but removes the entity from the cache and reassigns entity_index_to_reassign to use the old + // entity_index; for example, if entity_index 3 is being removed and 5 is the highest index, if entity_index_to_reassign is 5, + // then this function will move the entity data that was previously in index 5 to be referenced by index 3 for all caches + inline static void RemoveEntity(Entity *container, Entity *entity, size_t entity_index, size_t entity_index_to_reassign) + { + if(entity == nullptr || container == nullptr) + return; + + #ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock write_lock(queryCacheMutex); + #endif + + auto found_cache = queryCaches.find(container); + if(found_cache != end(queryCaches)) + { + found_cache->second->RemoveEntity(entity, entity_index, entity_index_to_reassign); + queryCaches.erase(entity); + } + } + + //sorts the entities by their string ids + inline static void SortEntitiesByID(std::vector &entities) + { + //for performance reasons, it may be worth considering other data structures if sort ever becomes or remains significant + std::sort(begin(entities), end(entities), + [](Entity *a, Entity *b) + { + const std::string a_id = a->GetId(); + const std::string b_id = b->GetId(); + + int comp = StringNaturalCompare(a_id, b_id); + return comp < 0; + }); + } + + //converts a set of DistanceReferencePair into the appropriate EvaluableNode structure + template + static inline EvaluableNodeReference ConvertResultsToEvaluableNodes( + std::vector> &results, + EvaluableNodeManager *enm, bool as_sorted_list, StringInternPool::StringID additional_sorted_list_label, + GetEntityFunction get_entity) + { + if(as_sorted_list) + { + //build list of results + EvaluableNode *query_return = enm->AllocNode(ENT_LIST); + auto &qr_ocn = query_return->GetOrderedChildNodesReference(); + qr_ocn.resize(additional_sorted_list_label == string_intern_pool.NOT_A_STRING_ID ? 2 : 3); + + qr_ocn[0] = CreateListOfStringsIdsFromIteratorAndFunction(results, enm, + [get_entity](auto &drp) { return get_entity(drp.reference)->GetIdStringId(); }); + qr_ocn[1] = CreateListOfNumbersFromIteratorAndFunction(results, enm, [](auto drp) { return drp.distance; }); + + //if adding on a label, retrieve the values from the entities + if(additional_sorted_list_label != string_intern_pool.NOT_A_STRING_ID) + { + //make a copy of the value at additionalSortedListLabel for each entity + EvaluableNode *list_of_values = enm->AllocNode(ENT_LIST); + qr_ocn[2] = list_of_values; + auto &list_ocn = list_of_values->GetOrderedChildNodes(); + list_ocn.resize(results.size()); + for(size_t i = 0; i < results.size(); i++) + { + Entity *entity = get_entity(results[i].reference); + list_ocn[i] = entity->GetValueAtLabel(additional_sorted_list_label, enm, false); + + //update cycle checks and idempotency + if(list_ocn[i] != nullptr) + { + if(list_ocn[i]->GetNeedCycleCheck()) + query_return->SetNeedCycleCheck(true); + + if(!list_ocn[i]->GetIsIdempotent()) + query_return->SetIsIdempotent(false); + } + } + } + + return EvaluableNodeReference(query_return, true); + } + else //return as assoc + { + return CreateAssocOfNumbersFromIteratorAndFunctions(results, + [get_entity](auto &drp) { return get_entity(drp.reference)->GetIdStringId(); }, + [](auto &drp) { return drp.distance; }, + enm + ); + } + } + +protected: + +#ifdef MULTITHREAD_SUPPORT + //mutex for operations that may edit or modify the entity's properties and attributes + static Concurrency::ReadWriteMutex queryCacheMutex; +#endif + + //set of caches for numeric queries + static FastHashMap> queryCaches; + + //maximum number of entities which to apply a brute force search (not building up caches, etc.) + static size_t maxEntitiesBruteForceSearch; +}; diff --git a/src/Amalgam/entity/EntityWriteListener.cpp b/src/Amalgam/entity/EntityWriteListener.cpp new file mode 100644 index 00000000..5543bef9 --- /dev/null +++ b/src/Amalgam/entity/EntityWriteListener.cpp @@ -0,0 +1,199 @@ +//project headers: +#include "EntityWriteListener.h" +#include "EvaluableNodeTreeFunctions.h" + +EntityWriteListener::EntityWriteListener(Entity *listening_entity, bool retain_writes, const std::string &filename) +{ + listeningEntity = listening_entity; + + if(retain_writes) + storedWrites = listenerStorage.AllocNode(ENT_SEQUENCE); + else + storedWrites = nullptr; + + if(filename != "") + { + logFile.open(filename, std::ios::binary); + logFile << "(" << GetStringFromEvaluableNodeType(ENT_SEQUENCE) << "\r\n"; + } +} + +EntityWriteListener::~EntityWriteListener() +{ + if(logFile.is_open()) + { + logFile << ")" << "\r\n"; + logFile.close(); + } +} + +void EntityWriteListener::LogSystemCall(EvaluableNode *params) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::SingleLock lock(mutex); +#endif + + EvaluableNode *new_sys_call = listenerStorage.AllocNode(ENT_SYSTEM); + new_sys_call->AppendOrderedChildNode(listenerStorage.DeepAllocCopy(params)); + + LogNewEntry(new_sys_call); +} + +void EntityWriteListener::LogPrint(std::string &print_string) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::SingleLock lock(mutex); +#endif + + EvaluableNode *new_print = listenerStorage.AllocNode(ENT_PRINT); + new_print->AppendOrderedChildNode(listenerStorage.AllocNode(ENT_STRING, print_string)); + + // don't flush because printing is handled in a bulk loop, the interpreter will manually flush afterwards + LogNewEntry(new_print, false); +} + +void EntityWriteListener::LogWriteValueToEntity(Entity *entity, EvaluableNode *value, const StringInternPool::StringID label_name, bool direct_set) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::SingleLock lock(mutex); +#endif + + EvaluableNode *new_write = BuildNewWriteOperation(direct_set ? ENT_DIRECT_ASSIGN_TO_ENTITIES : ENT_ASSIGN_TO_ENTITIES, entity); + + EvaluableNode *assoc = listenerStorage.AllocNode(ENT_ASSOC); + new_write->AppendOrderedChildNode(assoc); + + assoc->AppendOrderedChildNode(listenerStorage.AllocNode(ENT_STRING, label_name)); + assoc->AppendOrderedChildNode(listenerStorage.DeepAllocCopy(value, direct_set ? EvaluableNodeManager::ENMM_NO_CHANGE : EvaluableNodeManager::ENMM_REMOVE_ALL)); + + LogNewEntry(new_write); +} + +void EntityWriteListener::LogWriteValuesToEntity(Entity *entity, EvaluableNode *label_value_pairs, bool direct_set) +{ + //can only work with assoc arrays + if(!EvaluableNode::IsAssociativeArray(label_value_pairs)) + return; + +#ifdef MULTITHREAD_SUPPORT + Concurrency::SingleLock lock(mutex); +#endif + + EvaluableNode *new_write = BuildNewWriteOperation(direct_set ? ENT_DIRECT_ASSIGN_TO_ENTITIES : ENT_ASSIGN_TO_ENTITIES, entity); + + EvaluableNode *assoc = listenerStorage.DeepAllocCopy(label_value_pairs, direct_set ? EvaluableNodeManager::ENMM_NO_CHANGE : EvaluableNodeManager::ENMM_REMOVE_ALL); + //just in case this node has a label left over, remove it + if(!direct_set) + assoc->ClearLabels(); + + new_write->AppendOrderedChildNode(assoc); + + LogNewEntry(new_write); +} + +void EntityWriteListener::LogWriteToEntity(Entity *entity, const std::string &new_code) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::SingleLock lock(mutex); +#endif + + EvaluableNode *new_write = BuildNewWriteOperation(ENT_ASSIGN_ENTITY_ROOTS, entity); + + new_write->AppendOrderedChildNode(listenerStorage.AllocNode(ENT_STRING, new_code)); + + LogNewEntry(new_write); +} + +void EntityWriteListener::LogCreateEntity(Entity *new_entity) +{ + if(new_entity == nullptr) + return; + +#ifdef MULTITHREAD_SUPPORT + Concurrency::SingleLock lock(mutex); +#endif + + LogCreateEntityRecurse(new_entity); +} + +void EntityWriteListener::LogDestroyEntity(Entity *destroyed_entity) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::SingleLock lock(mutex); +#endif + + EvaluableNode *new_destroy = BuildNewWriteOperation(ENT_DESTROY_ENTITIES, destroyed_entity); + + LogNewEntry(new_destroy); +} + +void EntityWriteListener::LogSetEntityRandomSeed(Entity *entity, const std::string &rand_seed, bool deep_set) +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::SingleLock lock(mutex); +#endif + + EvaluableNode *new_set = BuildNewWriteOperation(ENT_SET_ENTITY_RAND_SEED, entity); + + new_set->AppendOrderedChildNode(listenerStorage.AllocNode(ENT_STRING, rand_seed)); + + if(!deep_set) + new_set->AppendOrderedChildNode(listenerStorage.AllocNode(ENT_FALSE)); + + LogNewEntry(new_set); +} + +void EntityWriteListener::FlushLogFile() +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::SingleLock lock(mutex); +#endif + + if(logFile.is_open() && logFile.good()) + logFile.flush(); +} + +EvaluableNode *EntityWriteListener::BuildNewWriteOperation(EvaluableNodeType assign_type, Entity *target_entity) +{ + //create this code: + // (direct_assign_to_entity *id list* (assoc *label name* *value*)) + EvaluableNode *new_write = listenerStorage.AllocNode(assign_type); + + if(target_entity != listeningEntity) + { + EvaluableNode *id_list = GetTraversalIDPathListFromAToB(&listenerStorage, listeningEntity, target_entity); + new_write->AppendOrderedChildNode(id_list); + } + + return new_write; +} + +void EntityWriteListener::LogCreateEntityRecurse(Entity *new_entity) +{ + EvaluableNode *new_create = BuildNewWriteOperation(ENT_CREATE_ENTITIES, new_entity); + + EvaluableNodeReference new_entity_root_copy = new_entity->GetRoot(&listenerStorage); + new_create->AppendOrderedChildNode(new_entity_root_copy); + + LogNewEntry(new_create); + + //log any nested created entities + for(auto entity : new_entity->GetContainedEntities()) + LogCreateEntityRecurse(entity); +} + +void EntityWriteListener::LogNewEntry(EvaluableNode *new_entry, bool flush) +{ + if(logFile.is_open() && logFile.good()) + { + //one extra indentation because already have the sequence + logFile << Parser::Unparse(new_entry, &listenerStorage, false) << "\r\n"; + if(flush) + logFile.flush(); + } + + if(storedWrites == nullptr) + listenerStorage.FreeAllNodes(); + else + storedWrites->AppendOrderedChildNode(new_entry); +} diff --git a/src/Amalgam/entity/EntityWriteListener.h b/src/Amalgam/entity/EntityWriteListener.h new file mode 100644 index 00000000..9f27dbd1 --- /dev/null +++ b/src/Amalgam/entity/EntityWriteListener.h @@ -0,0 +1,66 @@ +#pragma once + +//project headers: +#include "Entity.h" + +//system headers: +#include + +class EntityWriteListener +{ +public: + //stores all writes to entities as a seq of direct_assigns + //listening_entity is the entity to store the relative ids to + //if retain_writes is true, then the listener will store the writes, and GetWrites() will return the list of all writes accumulated + //if filename is not empty, then it will attempt to open the file and log all writes to that file, and then flush the filestream + EntityWriteListener(Entity *listening_entity, bool retain_writes = false, const std::string &filename = std::string()); + + ~EntityWriteListener(); + + void LogSystemCall(EvaluableNode *params); + + // LogPrint does not flush to allow bulk processing + void LogPrint(std::string &print_string); + + void LogWriteValueToEntity(Entity *entity, EvaluableNode *value, const StringInternPool::StringID label_name, bool direct_set); + + //like LogWriteValueToEntity but where the keys are the labels and the values correspond in the assoc specified by label_value_pairs + void LogWriteValuesToEntity(Entity *entity, EvaluableNode *label_value_pairs, bool direct_set); + + void LogWriteToEntity(Entity *entity, const std::string &new_code); + + void LogCreateEntity(Entity *new_entity); + + void LogDestroyEntity(Entity *destroyed_entity); + + void LogSetEntityRandomSeed(Entity *entity, const std::string &rand_seed, bool deep_set); + + void FlushLogFile(); + + //returns all writes that the listener was aware of + constexpr EvaluableNode *GetWrites() + { + return storedWrites; + } + +protected: + //builds an assignment opcode for target_entity + EvaluableNode *BuildNewWriteOperation(EvaluableNodeType assign_type, Entity *target_entity); + + void LogCreateEntityRecurse(Entity *new_entity); + + //performs the write of the entry + void LogNewEntry(EvaluableNode *new_entry, bool flush = true); + + Entity *listeningEntity; + + EvaluableNodeManager listenerStorage; + + EvaluableNode *storedWrites; + std::ofstream logFile; + +#ifdef MULTITHREAD_SUPPORT + //mutex for writing to make sure everything is written in the same order + Concurrency::SingleMutex mutex; +#endif +}; diff --git a/src/Amalgam/evaluablenode/EvaluableNode.cpp b/src/Amalgam/evaluablenode/EvaluableNode.cpp new file mode 100644 index 00000000..4c6d1dc7 --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNode.cpp @@ -0,0 +1,1899 @@ +//project headers: +#include "EvaluableNode.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeManagement.h" +#include "FastMath.h" +#include "StringInternPool.h" + +//system headers: +#include +#include +#include + +void EvaluableNode::GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2, size_t &num_common_labels, size_t &num_unique_labels) +{ + num_common_labels = 0; + num_unique_labels = 0; + size_t num_n1_labels = 0; + size_t num_n2_labels = 0; + + if(n1 != nullptr) + num_n1_labels = n1->GetNumLabels(); + + if(n2 != nullptr) + num_n2_labels = n2->GetNumLabels(); + + //if no labels in either, then done + if(num_n1_labels == 0 && num_n2_labels == 0) + return; + + //if labels in one (but not both, because would have exited), then count total and done + if(num_n1_labels == 0 || num_n2_labels == 0) + { + num_unique_labels = std::max(num_n1_labels, num_n2_labels); + return; + } + + //if only have one label in each, compare immediately for speed + if(num_n1_labels == 1 && num_n2_labels == 1) + { + if(n1->GetLabel(0) == n2->GetLabel(0)) + num_common_labels = 1; + return; + } + + //compare + for(auto s_id : n1->GetLabelsStringIds()) + { + auto n2_label_sids = n2->GetLabelsStringIds(); + if(std::find(begin(n2_label_sids), end(n2_label_sids), s_id) != end(n2_label_sids)) + num_common_labels++; + } + + num_unique_labels = num_n1_labels + num_n2_labels - num_common_labels; //don't double-count the common labels +} + +bool EvaluableNode::AreShallowEqual(EvaluableNode *a, EvaluableNode *b) +{ + //check if one is null, then make sure both are null + bool a_is_null = EvaluableNode::IsNull(a); + bool b_is_null = EvaluableNode::IsNull(b); + if(a_is_null || b_is_null) + { + if(a_is_null == b_is_null) + return true; + + //one is null and the other isn't + return false; + } + + EvaluableNodeType a_type = a->GetType(); + EvaluableNodeType b_type = b->GetType(); + + //check both types are the same + if(a_type != b_type) + return false; + + //since both types are the same, only need to check one for the type of data + //check string equality + if(DoesEvaluableNodeTypeUseStringData(a_type)) + return a->GetStringIDReference() == b->GetStringIDReference(); + + //check numeric equality + if(DoesEvaluableNodeTypeUseNumberData(a_type)) + { + double av = EvaluableNode::ToNumber(a); + double bv = EvaluableNode::ToNumber(b); + return EqualIncludingNaN(av, bv); + } + + //if made it here, then it's an instruction, and they're of equal type + return true; +} + +bool EvaluableNode::IsTrue(EvaluableNode *n) +{ + if(n == nullptr) + return false; + + EvaluableNodeType node_type = n->GetType(); + if(node_type == ENT_TRUE) + return true; + if(node_type == ENT_FALSE) + return false; + if(node_type == ENT_NULL) + return false; + + if(DoesEvaluableNodeTypeUseNumberData(node_type)) + { + double &num = n->GetNumberValueReference(); + if(num == 0.0) + return false; + if(FastIsNaN(num)) + return false; + return true; + } + + if(DoesEvaluableNodeTypeUseStringData(node_type)) + { + if(n->GetStringIDReference() <= StringInternPool::EMPTY_STRING_ID) + return false; + return true; + } + + return true; +} + +EvaluableNode *EvaluableNode::RetrieveImmediateAssocValue(EvaluableNode *n, const std::string &key) +{ + if(!IsAssociativeArray(n)) + return nullptr; + + StringInternPool::StringID key_sid = string_intern_pool.GetIDFromString(key); + if(key_sid == StringInternPool::NOT_A_STRING_ID) + return nullptr; + + //first try for mapped + if(n->IsAssociativeArray()) + { + auto &mcn = n->GetMappedChildNodesReference(); + auto result_in_mapped = mcn.find(key_sid); + if(result_in_mapped != end(mcn)) + return result_in_mapped->second; + + //not found + return nullptr; + } + + //try for uninterpreted, every other value is a key, so skip values and make sure have room for the last key + auto &ocn = n->GetOrderedChildNodes(); + for(size_t i = 0; i + 1 < ocn.size(); i += 2) + { + EvaluableNode *key_node = ocn[i]; + if(key_node == nullptr) + continue; + if(key_node->GetType() != ENT_STRING) + continue; + if(key_node->GetStringValue() == key) + return ocn[i + 1]; + } + + return nullptr; +} + +int EvaluableNode::Compare(EvaluableNode *a, EvaluableNode *b) +{ + //try numerical comparison first + if(CanRepresentValueAsANumber(a) && CanRepresentValueAsANumber(b)) + { + double n_a = EvaluableNode::ToNumber(a); + double n_b = EvaluableNode::ToNumber(b); + + bool a_nan = FastIsNaN(n_a); + bool b_nan = FastIsNaN(n_b); + if(a_nan && b_nan) + return 0; + if(a_nan) + return -1; + if(b_nan) + return 1; + + if(n_a < n_b) + return -1; + else if(n_b < n_a) + return 1; + else + return 0; + } + + //compare via strings + //first check if they're the same + if(a != nullptr && b != nullptr) + { + if(DoesEvaluableNodeTypeUseStringData(a->GetType()) && DoesEvaluableNodeTypeUseStringData(b->GetType()) + && a->GetStringIDReference() == b->GetStringIDReference()) + return 0; + } + + std::string a_str = EvaluableNode::ToString(a); + std::string b_str = EvaluableNode::ToString(b); + return StringNaturalCompare(a_str, b_str); +} + +double EvaluableNode::ToNumber(EvaluableNode *e, double value_if_null) +{ + if(e == nullptr) + return value_if_null; + + switch(e->GetType()) + { + case ENT_TRUE: + return 1; + case ENT_FALSE: + return 0; + case ENT_NULL: + return value_if_null; + case ENT_NUMBER: + return e->GetNumberValueReference(); + case ENT_STRING: + case ENT_SYMBOL: + { + auto sid = e->GetStringIDReference(); + if(sid == string_intern_pool.NOT_A_STRING_ID) + return value_if_null; + const auto &str = string_intern_pool.GetStringFromID(sid); + auto [value, success] = Platform_StringToNumber(str); + if(success) + return value; + return value_if_null; + } + default: + return static_cast(e->GetNumChildNodes()); + } +} + +const std::string EvaluableNode::ToString(EvaluableNode *e) +{ + if(e == nullptr) + return "null"; + + switch(e->GetType()) + { + case ENT_NUMBER: + { + double value = e->GetNumberValueReference(); + if(FastIsNaN(value)) + return ".nan"; + if(std::isinf(value)) + { + if(value > 0.0) + return ".infinity"; + else + return "-.infinity"; + } + return NumberToString(value); + } + case ENT_STRING: + case ENT_SYMBOL: + return e->GetStringValue(); + default: + return GetStringFromEvaluableNodeType(e->GetType()); + } +} + +StringInternPool::StringID EvaluableNode::ToStringIDIfExists(EvaluableNode *e) +{ + if(IsEmptyNode(e)) + return StringInternPool::NOT_A_STRING_ID; + + if((e->GetType() == ENT_STRING || e->GetType() == ENT_SYMBOL)) + return e->GetStringIDReference(); + + //see if the string exists even if it is not stored as a StringID + const std::string str_value = ToString(e); + //will return empty string if not found + return string_intern_pool.GetIDFromString(str_value); +} + +StringInternPool::StringID EvaluableNode::ToStringIDWithReference(EvaluableNode *e) +{ + //NaS doesn't need a reference + if(IsEmptyNode(e)) + return StringInternPool::NOT_A_STRING_ID; + + if(e->GetType() == ENT_STRING || e->GetType() == ENT_SYMBOL) + return string_intern_pool.CreateStringReference(e->GetStringIDReference()); + + std::string stringified = ToString(e); + return string_intern_pool.CreateStringReference(stringified); +} + +StringInternPool::StringID EvaluableNode::ToStringIDTakingReferenceAndClearing(EvaluableNode *e) +{ + //NaS doesn't need a reference + if(IsEmptyNode(e)) + return StringInternPool::NOT_A_STRING_ID; + + if(e->GetType() == ENT_STRING || e->GetType() == ENT_SYMBOL) + { + //clear the reference and return it + StringInternPool::StringID &sid_reference = e->GetStringIDReference(); + StringInternPool::StringID sid_to_return = string_intern_pool.NOT_A_STRING_ID; + std::swap(sid_reference, sid_to_return); + return sid_to_return; + } + + std::string stringified = ToString(e); + return string_intern_pool.CreateStringReference(stringified); +} + +void EvaluableNode::ConvertOrderedListToNumberedAssoc() +{ + //don't do anything if no child nodes + if(!DoesEvaluableNodeTypeUseOrderedData(GetType())) + { + InitMappedChildNodes(); + type = ENT_ASSOC; + return; + } + + AssocType new_map; + + //convert ordered child nodes into index number -> value + auto &ocn = GetOrderedChildNodes(); + new_map.reserve(ocn.size()); + for(size_t i = 0; i < ocn.size(); i++) + new_map[string_intern_pool.CreateStringReference(NumberToString(i))] = ocn[i]; + + InitMappedChildNodes(); + type = ENT_ASSOC; + + //swap for efficiency + std::swap(GetMappedChildNodesReference(), new_map); +} + +size_t EvaluableNode::GetEstimatedNodeSizeInBytes(EvaluableNode *n) +{ + if(n == nullptr) + return 0; + + size_t total_size = 0; + total_size += sizeof(EvaluableNode); + if(n->HasExtendedValue()) + total_size += sizeof(EvaluableNode::EvaluableNodeExtendedValue); + total_size += n->GetNumLabels() * sizeof(StringInternPool::StringID); + + total_size += n->GetOrderedChildNodes().capacity() * sizeof(EvaluableNode *); + total_size += n->GetMappedChildNodes().size() * (sizeof(StringInternPool::StringID) + sizeof(EvaluableNode *)); + + return total_size; +} + +void EvaluableNode::InitializeType(EvaluableNode *n, bool copy_labels, bool copy_comments_and_concurrency) +{ + attributes.allAttributes = 0; + if(n == nullptr) + { + type = ENT_NULL; + value.ConstructOrderedChildNodes(); + return; + } + + type = n->GetType(); + + if(DoesEvaluableNodeTypeUseAssocData(type)) + { + value.ConstructMappedChildNodes(); + value.mappedChildNodes = n->GetMappedChildNodesReference(); + string_intern_pool.CreateStringReferences(value.mappedChildNodes, [](auto n) { return n.first; }); + + //update idempotency + SetIsIdempotent(true); + for(auto &[_, cn] : value.mappedChildNodes) + { + if(cn != nullptr && !cn->GetIsIdempotent()) + { + SetIsIdempotent(false); + break; + } + } + } + else if(DoesEvaluableNodeTypeUseNumberData(type)) + { + value.numberValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + value.numberValueContainer.numberValue = n->GetNumberValueReference(); + SetIsIdempotent(true); + } + else if(DoesEvaluableNodeTypeUseStringData(type)) + { + value.stringValueContainer.stringID = string_intern_pool.CreateStringReference(n->GetStringIDReference()); + value.stringValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + SetIsIdempotent(type == ENT_STRING); + } + else //ordered + { + value.ConstructOrderedChildNodes(); + value.orderedChildNodes = n->GetOrderedChildNodesReference(); + + //update idempotency + if(IsEvaluableNodeTypePotentiallyIdempotent(type)) + { + SetIsIdempotent(true); + for(auto &cn : value.orderedChildNodes) + { + if(cn != nullptr && !cn->GetIsIdempotent()) + { + SetIsIdempotent(false); + break; + } + } + } + else + { + SetIsIdempotent(false); + } + } + + //child nodes were copied, so propagate whether cycle free + SetNeedCycleCheck(n->GetNeedCycleCheck()); + + if(copy_comments_and_concurrency) + SetConcurrency(n->GetConcurrency()); + + if(copy_labels || copy_comments_and_concurrency) + { + if(n->HasExtendedValue()) + { + EnsureEvaluableNodeExtended(); + if(copy_labels) + SetLabelsStringIds(n->GetLabelsStringIds()); + if(copy_comments_and_concurrency) + SetCommentsStringId(n->GetCommentsStringId()); + } + //copy_comments doesn't matter because if made it here, there aren't any + else if(copy_labels && HasCompactSingleLabelStorage()) + { + StringInternPool::StringID id = n->GetCompactSingleLabelStorage(); + if(id != StringInternPool::NOT_A_STRING_ID) + GetCompactSingleLabelStorage() = string_intern_pool.CreateStringReference(id); + } + } +} + +void EvaluableNode::CopyValueFrom(EvaluableNode *n) +{ + if(n == nullptr) + { + + ClearOrderedChildNodes(); + ClearMappedChildNodes(); + //doesn't need an EvaluableNodeManager because not converting child nodes from one type to another + SetType(ENT_NULL, nullptr, false); + return; + } + + auto cur_type = n->GetType(); + //doesn't need an EvaluableNodeManager because not converting child nodes from one type to another + SetType(cur_type, nullptr, false); + + if(DoesEvaluableNodeTypeUseAssocData(cur_type)) + { + auto &n_mcn = n->GetMappedChildNodesReference(); + if(n_mcn.empty()) + ClearMappedChildNodes(); + else + SetMappedChildNodes(n_mcn, true); + } + else if(DoesEvaluableNodeTypeUseNumberData(cur_type)) + GetNumberValueReference() = n->GetNumberValueReference(); + else if(DoesEvaluableNodeTypeUseStringData(cur_type)) + SetStringID(n->GetStringIDReference()); + else //ordered + { + auto &n_ocn = n->GetOrderedChildNodesReference(); + if(n_ocn.empty()) + ClearOrderedChildNodes(); + else + SetOrderedChildNodes(n_ocn); + } + + if(GetNumLabels() > 0) + SetIsIdempotent(false); + else + SetIsIdempotent(n->GetIsIdempotent()); +} + +void EvaluableNode::CopyMetadataFrom(EvaluableNode *n) +{ + //copy labels (different ways based on type) + if(HasCompactSingleLabelStorage() && n->HasCompactSingleLabelStorage()) + { + auto string_id = GetCompactSingleLabelStorage(); + auto n_string_id = n->GetCompactSingleLabelStorage(); + + if(string_id != n_string_id) + { + string_intern_pool.DestroyStringReference(string_id); + GetCompactSingleLabelStorage() = string_intern_pool.CreateStringReference(n_string_id); + SetIsIdempotent(false); + } + } + else + { + auto label_sids = n->GetLabelsStringIds(); + if(label_sids.size() > 0) + { + SetLabelsStringIds(label_sids); + SetIsIdempotent(false); + } + else + ClearLabels(); + } + + SetCommentsStringId(n->GetCommentsStringId()); + SetConcurrency(n->GetConcurrency()); +} + +void EvaluableNode::SetType(EvaluableNodeType new_type, EvaluableNodeManager *enm, + bool attempt_to_preserve_immediate_value) +{ + EvaluableNodeType cur_type = GetType(); + if(new_type == cur_type) + return; + + if( (DoesEvaluableNodeTypeUseNumberData(cur_type) && DoesEvaluableNodeTypeUseNumberData(new_type)) + || (DoesEvaluableNodeTypeUseStringData(cur_type) && DoesEvaluableNodeTypeUseStringData(new_type)) + || (DoesEvaluableNodeTypeUseAssocData(cur_type) && DoesEvaluableNodeTypeUseAssocData(new_type)) + || (DoesEvaluableNodeTypeUseOrderedData(cur_type) && DoesEvaluableNodeTypeUseOrderedData(new_type)) ) + { + type = new_type; + + //lose idempotency if the new type isn't + if(GetIsIdempotent() && !IsEvaluableNodeTypePotentiallyIdempotent(type)) + SetIsIdempotent(false); + + return; + } + + //need to preserve the extra label if it exists + StringInternPool::StringID extra_label = StringInternPool::NOT_A_STRING_ID; + if(HasCompactSingleLabelStorage()) + { + extra_label = GetCompactSingleLabelStorage(); + GetCompactSingleLabelStorage() = StringInternPool::NOT_A_STRING_ID; + } + + //transform as appropriate + if(DoesEvaluableNodeTypeUseNumberData(new_type)) + { + double number_value = 0.0; + if(attempt_to_preserve_immediate_value) + number_value = EvaluableNode::ToNumber(this); + + InitNumberValue(); + GetNumberValueReference() = number_value; + + //will check below if any reason to not be idempotent + SetIsIdempotent(true); + } + else if(DoesEvaluableNodeTypeUseStringData(new_type)) + { + StringInternPool::StringID sid = string_intern_pool.NOT_A_STRING_ID; + if(attempt_to_preserve_immediate_value) + sid = EvaluableNode::ToStringIDWithReference(this); + InitStringValue(); + GetStringIDReference() = sid; + + //will check below if any reason to not be idempotent + SetIsIdempotent(new_type == ENT_STRING); + } + else if(DoesEvaluableNodeTypeUseAssocData(new_type)) + { + if(DoesEvaluableNodeTypeUseOrderedData(cur_type)) + { + //convert ordered pairs to assoc + AssocType new_map; + + auto &ocn = GetOrderedChildNodesReference(); + new_map.reserve((ocn.size() + 1) / 2); + for(size_t i = 0; i < ocn.size(); i += 2) + { + auto sid = ToStringIDWithReference(ocn[i]); + + EvaluableNode *value = nullptr; + if(i + 1 < ocn.size()) + value = ocn[i + 1]; + + //try to insert, but drop reference if couldn't + if(!new_map.insert(std::make_pair(sid, value)).second) + string_intern_pool.DestroyStringReference(sid); + } + + //set up mapped nodes + InitMappedChildNodes(); + //swap for efficiency + std::swap(GetMappedChildNodesReference(), new_map); + } + else //just set up empty assoc + { + InitMappedChildNodes(); + } + } + else //ordered pairs + { + //will need a valid enm to convert this + if(DoesEvaluableNodeTypeUseAssocData(cur_type) && enm != nullptr) + { + std::vector new_ordered; + auto &mcn = GetMappedChildNodes(); + new_ordered.reserve(2 * mcn.size()); + for(auto &[cn_id, cn] : mcn) + { + //keep the reference from when it was an assoc + new_ordered.push_back(enm->AllocNodeWithReferenceHandoff(ENT_STRING, cn_id)); + new_ordered.push_back(cn); + } + + //clear the mapped nodes here, because don't want to free the references + // as they were handed off to the newly allocated ordered child nodes + mcn.clear(); + InitOrderedChildNodes(); + //swap for efficiency + swap(GetOrderedChildNodesReference(), new_ordered); + } + else //just set up empty ordered + { + InitOrderedChildNodes(); + } + } + + type = new_type; + + //cleared child nodes, so no cycles + SetNeedCycleCheck(false); + + //put the extra label back on if exists (already have the reference) + if(extra_label != StringInternPool::NOT_A_STRING_ID) + AppendLabelStringId(extra_label, true); + + //reset idempotency if applicable + // can only go one way with idempotency, because if it's not idempotent + if(GetNumLabels() == 0) + { + if(GetIsIdempotent()) + SetIsIdempotent(IsEvaluableNodeTypePotentiallyIdempotent(new_type)); + } + else + SetIsIdempotent(false); +} + +void EvaluableNode::InitNumberValue() +{ + if(type == ENT_NUMBER) + return; + + DestructValue(); + + SetNeedCycleCheck(false); + + if(HasExtendedValue()) + { + value.extension.extendedValue->value.numberValueContainer.numberValue = 0.0; + } + else + { + value.numberValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + value.numberValueContainer.numberValue = 0.0; + } +} + +void EvaluableNode::InitStringValue() +{ + if(DoesEvaluableNodeTypeUseStringData(GetType())) + return; + + DestructValue(); + + SetNeedCycleCheck(false); + + if(HasExtendedValue()) + { + value.extension.extendedValue->value.stringValueContainer.stringID = StringInternPool::NOT_A_STRING_ID; + } + else + { + value.stringValueContainer.stringID = StringInternPool::NOT_A_STRING_ID; + value.stringValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + } +} + +void EvaluableNode::SetStringID(StringInternPool::StringID id) +{ + if(DoesEvaluableNodeTypeUseStringData(GetType())) + { + if(!HasExtendedValue()) + { + StringInternPool::StringID cur_id = value.stringValueContainer.stringID; + if(id != cur_id) + { + string_intern_pool.DestroyStringReference(cur_id); + value.stringValueContainer.stringID = string_intern_pool.CreateStringReference(id); + } + } + else + { + StringInternPool::StringID cur_id = value.extension.extendedValue->value.stringValueContainer.stringID; + if(id != cur_id) + { + string_intern_pool.DestroyStringReference(cur_id); + value.extension.extendedValue->value.stringValueContainer.stringID = string_intern_pool.CreateStringReference(id); + } + } + } +} + +const std::string &EvaluableNode::GetStringValue() +{ + if(DoesEvaluableNodeTypeUseStringData(GetType())) + { + if(!HasExtendedValue()) + return string_intern_pool.GetStringFromID(value.stringValueContainer.stringID); + else + return string_intern_pool.GetStringFromID(value.extension.extendedValue->value.stringValueContainer.stringID); + } + + //none of the above, return an empty one + return emptyStringValue; +} + +//Note: this function is logically equivalent to SetStringValueID +// After string interning is implemented throughout, this should be revisited to see if these two functions should be combined. +void EvaluableNode::SetStringValue(const std::string &v) +{ + if(DoesEvaluableNodeTypeUseStringData(GetType())) + { + if(!HasExtendedValue()) + { + //create a new reference before destroying so don't accidentally destroy something that will then need to be recreated + auto new_id = string_intern_pool.CreateStringReference(v); + //destroy anything that was already in there + string_intern_pool.DestroyStringReference(value.stringValueContainer.stringID); + value.stringValueContainer.stringID = new_id; + } + else + { + //create a new reference before destroying so don't accidentally destroy something that will then need to be recreated + auto new_id = string_intern_pool.CreateStringReference(v); + //destroy anything that was already in there + string_intern_pool.DestroyStringReference(value.extension.extendedValue->value.stringValueContainer.stringID); + value.extension.extendedValue->value.stringValueContainer.stringID = new_id; + } + } +} + +StringInternPool::StringID EvaluableNode::GetAndClearStringIDWithReference() +{ + StringInternPool::StringID sid = StringInternPool::NOT_A_STRING_ID; + if(DoesEvaluableNodeTypeUseStringData(GetType())) + { + //retrieve id and just clear it, as the caller will take care of the reference + if(!HasExtendedValue()) + { + sid = value.stringValueContainer.stringID; + value.stringValueContainer.stringID = StringInternPool::NOT_A_STRING_ID; + } + else + { + sid = value.extension.extendedValue->value.stringValueContainer.stringID; + value.extension.extendedValue->value.stringValueContainer.stringID = StringInternPool::NOT_A_STRING_ID; + } + } + + return sid; +} + +void EvaluableNode::SetStringIDWithReferenceHandoff(StringInternPool::StringID id) +{ + if(DoesEvaluableNodeTypeUseStringData(GetType())) + { + if(!HasExtendedValue()) + { + StringInternPool::StringID cur_id = value.stringValueContainer.stringID; + string_intern_pool.DestroyStringReference(cur_id); + value.stringValueContainer.stringID = id; + } + else + { + StringInternPool::StringID cur_id = value.extension.extendedValue->value.stringValueContainer.stringID; + string_intern_pool.DestroyStringReference(cur_id); + value.extension.extendedValue->value.stringValueContainer.stringID = id; + } + } +} + +std::vector EvaluableNode::GetLabelsStringIds() +{ + if(!HasExtendedValue()) + { + if(HasCompactSingleLabelStorage()) + { + if(GetCompactSingleLabelStorage() == StringInternPool::NOT_A_STRING_ID) + return emptyStringIdVector; + + std::vector label_vec; + label_vec.push_back(GetCompactSingleLabelStorage()); + return label_vec; + } + + return emptyStringIdVector; + } + + return value.extension.extendedValue->labelsStringIds; +} + +std::vector EvaluableNode::GetLabelsStrings() +{ + if(!HasExtendedValue()) + { + if(HasCompactSingleLabelStorage()) + { + if(GetCompactSingleLabelStorage() == StringInternPool::NOT_A_STRING_ID) + return emptyStringVector; + + std::vector label_vec; + label_vec.push_back(GetLabel(0)); + return label_vec; + } + + return emptyStringVector; + } + + auto &sids = value.extension.extendedValue->labelsStringIds; + std::vector label_vec(sids.size()); + for(size_t i = 0; i < sids.size(); i++) + label_vec[i] = string_intern_pool.GetStringFromID(sids[i]); + + return label_vec; +} + +void EvaluableNode::SetLabelsStringIds(const std::vector &label_string_ids) +{ + if(label_string_ids.size() == 0) + { + ClearLabels(); + return; + } + + //can no longer be idempotent because it could be altered by something collecting labels + attributes.individualAttribs.isIdempotent = false; + + if(!HasExtendedValue()) + { + if(label_string_ids.size() == 1 && HasCompactSingleLabelStorage()) + { + StringInternPool::StringID cur_id = GetCompactSingleLabelStorage(); + if(label_string_ids[0] != cur_id) + { + string_intern_pool.DestroyStringReference(GetCompactSingleLabelStorage()); + GetCompactSingleLabelStorage() = string_intern_pool.CreateStringReference(label_string_ids[0]); + } + return; + } + + //doesn't have enough storage, so extend and set below + EnsureEvaluableNodeExtended(); + } + + //create new references before destroying old (so don't need to recreate strings if they are freed and then released + string_intern_pool.CreateStringReferences(label_string_ids); + + //clear references to anything existing + string_intern_pool.DestroyStringReferences(value.extension.extendedValue->labelsStringIds); + + value.extension.extendedValue->labelsStringIds = label_string_ids; +} + +size_t EvaluableNode::GetNumLabels() +{ + if(!HasExtendedValue()) + { + if(HasCompactSingleLabelStorage() && GetCompactSingleLabelStorage() != StringInternPool::NOT_A_STRING_ID) + return 1; + + return 0; + } + + auto &sids = value.extension.extendedValue->labelsStringIds; + return sids.size(); +} + +const std::string &EvaluableNode::GetLabel(size_t label_index) +{ + if(!HasExtendedValue()) + { + if(HasCompactSingleLabelStorage()) + { + if(label_index != 0) + return StringInternPool::EMPTY_STRING; + + return string_intern_pool.GetStringFromID(GetCompactSingleLabelStorage()); + } + + return StringInternPool::EMPTY_STRING; + } + + auto &sids = value.extension.extendedValue->labelsStringIds; + if(label_index >= sids.size()) + return StringInternPool::EMPTY_STRING; + else + return string_intern_pool.GetStringFromID(sids[label_index]); +} + +const StringInternPool::StringID EvaluableNode::GetLabelStringId(size_t label_index) +{ + if(!HasExtendedValue()) + { + if(HasCompactSingleLabelStorage()) + { + if(label_index != 0) + return StringInternPool::NOT_A_STRING_ID; + + return GetCompactSingleLabelStorage(); + } + + return StringInternPool::NOT_A_STRING_ID; + } + + auto &sids = value.extension.extendedValue->labelsStringIds; + if(label_index >= sids.size()) + return StringInternPool::NOT_A_STRING_ID; + else + return sids[label_index]; +} + +void EvaluableNode::RemoveLabel(size_t label_index) +{ + if(HasCompactSingleLabelStorage()) + { + if(label_index == 0) + { + string_intern_pool.DestroyStringReference(GetCompactSingleLabelStorage()); + GetCompactSingleLabelStorage() = StringInternPool::NOT_A_STRING_ID; + } + + return; + } + + if(!HasExtendedValue()) + return; + + if(label_index >= value.extension.extendedValue->labelsStringIds.size()) + return; + + string_intern_pool.DestroyStringReference(value.extension.extendedValue->labelsStringIds[label_index]); + value.extension.extendedValue->labelsStringIds.erase(begin(value.extension.extendedValue->labelsStringIds) + label_index); +} + +void EvaluableNode::ClearLabels() +{ + if(HasCompactSingleLabelStorage()) + { + string_intern_pool.DestroyStringReference(GetCompactSingleLabelStorage()); + GetCompactSingleLabelStorage() = StringInternPool::NOT_A_STRING_ID; + return; + } + + if(!HasExtendedValue()) + return; + + string_intern_pool.DestroyStringReferences(value.extension.extendedValue->labelsStringIds); + value.extension.extendedValue->labelsStringIds.clear(); +} + +void EvaluableNode::ReserveLabels(size_t num_labels) +{ + if(num_labels == 0) + return; + + //see if compact storage is good enough + if(HasCompactSingleLabelStorage() && num_labels <= 1) + return; + + if(!HasExtendedValue()) + EnsureEvaluableNodeExtended(); + + value.extension.extendedValue->labelsStringIds.reserve(num_labels); +} + +void EvaluableNode::AppendLabelStringId(StringInternPool::StringID label_string_id, bool handoff_reference) +{ + //can no longer be idempotent because it could be altered by something collecting labels + attributes.individualAttribs.isIdempotent = false; + + if(!handoff_reference) + string_intern_pool.CreateStringReference(label_string_id); + + if(HasCompactSingleLabelStorage() && GetCompactSingleLabelStorage() == StringInternPool::NOT_A_STRING_ID) + { + GetCompactSingleLabelStorage() = label_string_id; + return; + } + + if(!HasExtendedValue()) + EnsureEvaluableNodeExtended(); + + value.extension.extendedValue->labelsStringIds.push_back(label_string_id); +} + +void EvaluableNode::AppendLabel(const std::string &label) +{ + //can no longer be idempotent because it could be altered by something collecting labels + attributes.individualAttribs.isIdempotent = false; + + if(HasCompactSingleLabelStorage() && GetCompactSingleLabelStorage() == StringInternPool::NOT_A_STRING_ID) + { + GetCompactSingleLabelStorage() = string_intern_pool.CreateStringReference(label); + return; + } + + if(!HasExtendedValue()) + EnsureEvaluableNodeExtended(); + + value.extension.extendedValue->labelsStringIds.push_back(string_intern_pool.CreateStringReference(label)); +} + +StringInternPool::StringID EvaluableNode::GetCommentsStringId() +{ + if(!HasExtendedValue()) + return StringInternPool::NOT_A_STRING_ID; + + return value.extension.commentsStringId; +} + +std::vector EvaluableNode::GetCommentsSeparateLines() +{ + std::vector comment_lines; + + StringInternPool::StringID comment_sid = GetCommentsStringId(); + if(comment_sid <= StringInternPool::EMPTY_STRING_ID) + return comment_lines; + + const auto &full_comments = string_intern_pool.GetStringFromID(comment_sid); + + //early exit + if(full_comments == "") + return comment_lines; + + size_t cur = 0; + size_t prev = 0; + while((cur = full_comments.find('\n', prev)) != std::string::npos) + { + //skip carriage return if found prior to the newline + int carriage_return_offset = 0; + if(prev < cur && full_comments[cur - 1] == '\r') + carriage_return_offset = 1; + + comment_lines.push_back(full_comments.substr(prev, cur - prev - carriage_return_offset)); + prev = cur + 1; + } + + //get whatever is left + if(prev < full_comments.size()) + comment_lines.push_back(full_comments.substr(prev)); + + return comment_lines; +} + +void EvaluableNode::SetCommentsStringId(StringInternPool::StringID comments_string_id, bool handoff_reference) +{ + if(comments_string_id == StringInternPool::NOT_A_STRING_ID) + { + ClearComments(); + return; + } + + if(!HasExtendedValue()) + EnsureEvaluableNodeExtended(); + + if(!handoff_reference) + string_intern_pool.CreateStringReference(comments_string_id); + + //clear references to anything existing + string_intern_pool.DestroyStringReference(value.extension.commentsStringId); + + value.extension.commentsStringId = comments_string_id; +} + +void EvaluableNode::SetComments(const std::string &comments) +{ + if(comments == "") + { + ClearComments(); + return; + } + + if(!HasExtendedValue()) + EnsureEvaluableNodeExtended(); + + //create new references before destroying old (so don't need to recreate strings if they are freed and then released) + StringInternPool::StringID new_reference = string_intern_pool.CreateStringReference(comments); + + //clear references to anything existing + string_intern_pool.DestroyStringReference(value.extension.commentsStringId); + + value.extension.commentsStringId = new_reference; +} + +void EvaluableNode::ClearComments() +{ + if(!HasExtendedValue()) + return; + + string_intern_pool.DestroyStringReference(value.extension.commentsStringId); + + value.extension.commentsStringId = StringInternPool::NOT_A_STRING_ID; +} + +void EvaluableNode::AppendCommentsStringId(StringInternPool::StringID comments_string_id) +{ + if(!HasExtendedValue()) + EnsureEvaluableNodeExtended(); + + if(GetCommentsStringId() == string_intern_pool.NOT_A_STRING_ID) + { + SetCommentsStringId(comments_string_id); + } + else //already has comments, so append more + { + std::string appended = GetCommentsString(); + appended.append(string_intern_pool.GetStringFromID(comments_string_id)); + + SetComments(appended); + } +} + +void EvaluableNode::AppendComments(const std::string &comment) +{ + if(!HasExtendedValue()) + EnsureEvaluableNodeExtended(); + + if(GetCommentsStringId() == string_intern_pool.NOT_A_STRING_ID) + { + SetComments(comment); + } + else //already has comments, so append more + { + std::string appended = GetCommentsString(); + appended.append(comment); + + SetComments(appended); + } +} + +size_t EvaluableNode::GetNumChildNodes() +{ + if(IsEvaluableNodeTypeImmediate(GetType())) + return 0; + + if(IsAssociativeArray()) + return GetMappedChildNodesReference().size(); + else + return GetOrderedChildNodesReference().size(); + + return 0; +} + +void EvaluableNode::InitOrderedChildNodes() +{ + DestructValue(); + + //if can't hold a list, then just set it to the most generic type + if(!DoesEvaluableNodeTypeUseOrderedData(GetType())) + type = ENT_LIST; + + if(HasExtendedValue()) + value.extension.extendedValue->value.ConstructOrderedChildNodes(); + else + value.ConstructOrderedChildNodes(); +} + +void EvaluableNode::SetOrderedChildNodes(const std::vector &ocn) +{ + if(!IsOrderedArray()) + return; + + GetOrderedChildNodesReference() = ocn; + + //if cycles, propagate upward + SetNeedCycleCheck(false); + for(auto cn : ocn) + { + if(cn != nullptr && cn->GetNeedCycleCheck()) + { + SetNeedCycleCheck(true); + break; + } + } + + //set idempotency + if(GetNumLabels() == 0) + { + //if potentially idempotent, then see if it is + if(IsEvaluableNodeTypePotentiallyIdempotent(type)) + { + SetIsIdempotent(true); + for(auto cn : ocn) + { + if(cn != nullptr && !cn->GetIsIdempotent()) + { + SetIsIdempotent(false); + break; + } + } + } + } +} + +void EvaluableNode::ClearOrderedChildNodes() +{ + if(!IsOrderedArray()) + return; + + GetOrderedChildNodes().clear(); + + SetNeedCycleCheck(false); + + if(GetNumLabels() == 0) + SetIsIdempotent(IsEvaluableNodeTypePotentiallyIdempotent(type)); +} + +void EvaluableNode::AppendOrderedChildNode(EvaluableNode *cn) +{ + if(!IsOrderedArray()) + return; + + GetOrderedChildNodesReference().push_back(cn); + + if(cn != nullptr) + { + //if cycles, propagate upward + if(cn->GetNeedCycleCheck()) + SetNeedCycleCheck(true); + + //propagate idempotency + if(!cn->GetIsIdempotent()) + SetIsIdempotent(false); + } +} + +void EvaluableNode::AppendOrderedChildNodes(const std::vector &ocn_to_append) +{ + if(!IsOrderedArray()) + return; + + auto &ocn = GetOrderedChildNodesReference(); + ocn.insert(end(ocn), begin(ocn_to_append), end(ocn_to_append)); + + //if cycles, propagate upward + for(auto cn : ocn_to_append) + { + if(cn != nullptr && cn->GetNeedCycleCheck()) + { + SetNeedCycleCheck(true); + break; + } + } + + //propagate idempotency + if(GetIsIdempotent()) + { + for(auto cn : ocn_to_append) + { + if(cn != nullptr && !cn->GetIsIdempotent()) + { + SetIsIdempotent(false); + break; + } + } + } +} + +void EvaluableNode::InitMappedChildNodes() +{ + DestructValue(); + + if(!HasExtendedValue()) + value.ConstructMappedChildNodes(); + else + value.extension.extendedValue->value.ConstructMappedChildNodes(); +} + +EvaluableNode **EvaluableNode::GetMappedChildNode(const StringInternPool::StringID sid) +{ + auto &mcn = GetMappedChildNodes(); + auto node_iter = mcn.find(sid); + if(node_iter == end(mcn)) + return nullptr; + + //return the location of the child pointer + return &node_iter->second; +} + +EvaluableNode **EvaluableNode::GetOrCreateMappedChildNode(const std::string &id) +{ + auto &mcn = GetMappedChildNodesReference(); + + //create a reference in case it doesn't exist yet + StringInternPool::StringID sid = string_intern_pool.CreateStringReference(id); + + auto [inserted_node, inserted] = mcn.insert(std::make_pair(sid, nullptr)); + + //if the node was not inserted, then don't need the reference created + if(!inserted) + string_intern_pool.DestroyStringReference(sid); + + //return the location of the child pointer + return &inserted_node->second; +} + +EvaluableNode **EvaluableNode::GetOrCreateMappedChildNode(const StringInternPool::StringID sid) +{ + auto &mcn = GetMappedChildNodesReference(); + auto [inserted_node, inserted] = mcn.insert(std::make_pair(sid, nullptr)); + + //if the node was inserted, then create a reference + if(inserted) + string_intern_pool.CreateStringReference(sid); + + return &inserted_node->second; +} + +void EvaluableNode::SetMappedChildNodes(AssocType &new_mcn, bool copy) +{ + if(!IsAssociativeArray()) + return; + + auto &mcn = GetMappedChildNodesReference(); + + //create new references before freeing old ones + string_intern_pool.CreateStringReferences(new_mcn, [](auto n) { return n.first; }); + + //destroy any string refs for map + string_intern_pool.DestroyStringReferences(mcn, [](auto n) { return n.first; }); + + //swap map heap memory with new_mcn + if(copy) + mcn = new_mcn; + else + mcn.swap(new_mcn); + + //if cycles, propagate upward + SetNeedCycleCheck(false); + for(auto &[_, cn] : mcn) + { + if(cn != nullptr && cn->GetNeedCycleCheck()) + { + SetNeedCycleCheck(true); + break; + } + } + + //set idempotency + if(GetNumLabels() == 0) + { + //if potentially idempotent, then see if it is + if(IsEvaluableNodeTypePotentiallyIdempotent(type)) + { + SetIsIdempotent(true); + for(auto &[_, cn] : mcn) + { + if(cn != nullptr && !cn->GetIsIdempotent()) + { + SetIsIdempotent(false); + break; + } + } + } + } +} + +std::pair EvaluableNode::SetMappedChildNode(const std::string &id, EvaluableNode *node, bool overwrite) +{ + if(!IsAssociativeArray()) + return std::make_pair(false, nullptr); + + auto &mcn = GetMappedChildNodesReference(); + + StringInternPool::StringID sid = string_intern_pool.CreateStringReference(id); + + //try to insert; if fail, then need to remove extra reference and update node + auto [inserted_node, inserted] = mcn.insert(std::make_pair(sid, node)); + if(!inserted) + { + string_intern_pool.DestroyStringReference(sid); + if(!overwrite) + return std::make_pair(false, &inserted_node->second); + } + + //set node regardless of whether it was added + inserted_node->second = node; + + if(node != nullptr) + { + //if cycles, propagate upward + if(node->GetNeedCycleCheck()) + SetNeedCycleCheck(true); + + //propagate idempotency + if(!node->GetIsIdempotent()) + SetIsIdempotent(false); + } + + return std::make_pair(true, &inserted_node->second); +} + +std::pair EvaluableNode::SetMappedChildNode(const StringInternPool::StringID sid, EvaluableNode *node, bool overwrite) +{ + if(!IsAssociativeArray()) + return std::make_pair(false, nullptr); + + auto &mcn = GetMappedChildNodesReference(); + + auto [inserted_node, inserted] = mcn.insert(std::make_pair(sid, node)); + + if(inserted) + string_intern_pool.CreateStringReference(sid); //create string reference if pair was successfully set/added + else + { + //if not overwriting, return if sid is already found + if(!overwrite) + return std::make_pair(false, &inserted_node->second); + + //update the value + inserted_node->second = node; + } + + if(node != nullptr) + { + //if cycles, propagate upward + if(node->GetNeedCycleCheck()) + SetNeedCycleCheck(true); + + //propagate idempotency + if(!node->GetIsIdempotent()) + SetIsIdempotent(false); + } + + return std::make_pair(true, &inserted_node->second); +} + +bool EvaluableNode::SetMappedChildNodeWithReferenceHandoff(const StringInternPool::StringID sid, EvaluableNode *node, bool overwrite) +{ + if(!IsAssociativeArray()) + { + string_intern_pool.DestroyStringReference(sid); + return false; + } + + auto &mcn = GetMappedChildNodesReference(); + + auto [inserted_node, inserted] = mcn.insert(std::make_pair(sid, node)); + + if(!inserted) + { + //destroy the reference that was passed in, since this node already has a reference + string_intern_pool.DestroyStringReference(sid); + if(!overwrite) + return false; //if not overwriting, return if sid is already found + + //update the value + inserted_node->second = node; + } + + if(node != nullptr) + { + //if cycles, propagate upward + if(node->GetNeedCycleCheck()) + SetNeedCycleCheck(true); + + //propagate idempotency + if(!node->GetIsIdempotent()) + SetIsIdempotent(false); + } + + return true; +} + +void EvaluableNode::ClearMappedChildNodes() +{ + if(!IsAssociativeArray()) + return; + + auto &map = GetMappedChildNodes(); + string_intern_pool.DestroyStringReferences(map, [](auto n) { return n.first; }); + map.clear(); + + SetNeedCycleCheck(false); + + if(GetNumLabels() == 0) + SetIsIdempotent(IsEvaluableNodeTypePotentiallyIdempotent(type)); +} + +EvaluableNode *EvaluableNode::EraseMappedChildNode(const StringInternPool::StringID sid) +{ + auto &mcn = GetMappedChildNodes(); + //attempt to find + auto found = mcn.find(sid); + if(found == end(mcn)) + return nullptr; + + //erase and return the value + string_intern_pool.DestroyStringReference(sid); + EvaluableNode *erased_value = found->second; + mcn.erase(found); + return erased_value; +} + +void EvaluableNode::AppendMappedChildNodes(AssocType &mcn_to_append) +{ + if(!IsAssociativeArray()) + return; + + auto &mcn = GetMappedChildNodesReference(); + mcn.reserve(mcn.size() + mcn_to_append.size()); + + //insert everything + for(auto &[n_id, n] : mcn_to_append) + { + auto [inserted_node, inserted] = mcn.insert(std::make_pair(n_id, n)); + + if(inserted) + string_intern_pool.CreateStringReference(n_id); //create string reference if pair was successfully set/added + else //overwrite + inserted_node->second = n; + + if(n != nullptr) + { + //if cycles, propagate upward + if(n->GetNeedCycleCheck()) + SetNeedCycleCheck(true); + + //propagate idempotency + if(!n->GetIsIdempotent()) + SetIsIdempotent(false); + } + } +} + +void EvaluableNode::EnsureEvaluableNodeExtended() +{ + if(HasExtendedValue()) + return; + + EvaluableNodeExtendedValue *ev = new EvaluableNodeExtendedValue; + + switch(GetType()) + { + case ENT_NUMBER: + ev->value.numberValueContainer.numberValue = value.numberValueContainer.numberValue; + if(value.numberValueContainer.labelStringID != StringInternPool::NOT_A_STRING_ID) + ev->labelsStringIds.push_back(value.numberValueContainer.labelStringID); + break; + case ENT_STRING: + case ENT_SYMBOL: + ev->value.stringValueContainer.stringID = value.stringValueContainer.stringID; + if(value.stringValueContainer.labelStringID != StringInternPool::NOT_A_STRING_ID) + ev->labelsStringIds.push_back(value.stringValueContainer.labelStringID); + break; + case ENT_ASSOC: + ev->value.ConstructMappedChildNodes(); //construct an empty mappedChildNodes to swap out + std::swap(ev->value.mappedChildNodes, value.mappedChildNodes); + break; + //otherwise it's uninitialized, so treat as ordered + default: //all other opcodes + ev->value.ConstructOrderedChildNodes(); //construct an empty orderedChildNodes to swap out + std::swap(ev->value.orderedChildNodes, value.orderedChildNodes); + break; + } + + attributes.individualAttribs.hasExtendedValue = true; + value.extension.extendedValue = ev; + value.extension.commentsStringId = StringInternPool::NOT_A_STRING_ID; +} + +void EvaluableNode::DestructValue() +{ + if(!HasExtendedValue()) + { + switch(GetType()) + { + case ENT_NUMBER: + string_intern_pool.DestroyStringReference(value.numberValueContainer.labelStringID); + break; + case ENT_STRING: + case ENT_SYMBOL: + string_intern_pool.DestroyStringReference(value.stringValueContainer.stringID); + string_intern_pool.DestroyStringReference(value.stringValueContainer.labelStringID); + break; + case ENT_ASSOC: + value.DestructMappedChildNodes(); + break; + //otherwise it's uninitialized, so treat as ordered + default: + value.DestructOrderedChildNodes(); + break; + } + } + else + { + switch(GetType()) + { + case ENT_NUMBER: + //don't need to do anything + break; + case ENT_STRING: + case ENT_SYMBOL: + string_intern_pool.DestroyStringReference(value.extension.extendedValue->value.stringValueContainer.stringID); + break; + case ENT_ASSOC: + value.extension.extendedValue->value.DestructMappedChildNodes(); + break; + //otherwise it's uninitialized, so treat as ordered + default: + value.extension.extendedValue->value.DestructOrderedChildNodes(); + break; + } + } +} + +void EvaluableNode::Invalidate() +{ + if(!HasExtendedValue()) + { + switch(GetType()) + { + case ENT_DEALLOCATED: + return; + case ENT_NUMBER: + string_intern_pool.DestroyStringReference(value.numberValueContainer.labelStringID); + break; + case ENT_STRING: + case ENT_SYMBOL: + string_intern_pool.DestroyStringReference(value.stringValueContainer.stringID); + string_intern_pool.DestroyStringReference(value.stringValueContainer.labelStringID); + break; + case ENT_ASSOC: + value.DestructMappedChildNodes(); + break; + //otherwise it's uninitialized, so treat as ordered + default: + value.DestructOrderedChildNodes(); + break; + } + + //return early if no extended value, make sure to clear out data so it isn't double-deleted + type = ENT_DEALLOCATED; + attributes.allAttributes = 0; + value.numberValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + return; + } + + //has extended type + switch(GetType()) + { + case ENT_NUMBER: + //don't need to do anything + break; + case ENT_STRING: + case ENT_SYMBOL: + string_intern_pool.DestroyStringReference(value.extension.extendedValue->value.stringValueContainer.stringID); + break; + case ENT_ASSOC: + value.extension.extendedValue->value.DestructMappedChildNodes(); + break; + //otherwise it's uninitialized, so treat as ordered + default: + value.extension.extendedValue->value.DestructOrderedChildNodes(); + break; + } + + //delete extended if haven't returned yet + string_intern_pool.DestroyStringReferences(value.extension.extendedValue->labelsStringIds); + + string_intern_pool.DestroyStringReference(value.extension.commentsStringId); + + delete value.extension.extendedValue; + + type = ENT_DEALLOCATED; + attributes.allAttributes = 0; + value.numberValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; +} + +bool EvaluableNode::AreDeepEqualGivenShallowEqual(EvaluableNode *a, EvaluableNode *b, ReferenceAssocType *checked) +{ + //if either is a null and have same number of child nodes, then equal + if(a == nullptr || b == nullptr) + return true; + + if(checked != nullptr) + { + //try to record this as a new pair that is checked + auto [inserted_entry, inserted] = checked->insert(std::make_pair(a, b)); + + //if the entry for a already exists + if(!inserted) + { + //if it doesn't match, then there's an odd cycle and the graph structures don't match + if(inserted_entry->second != b) + return false; + + //already validated that these were equal + return true; + } + } + + //immediate values have no child nodes, so since shallow equal, they're equal + if(a->IsImmediate()) + return true; + + if(a->IsAssociativeArray()) + { + //if a is associative, b must be too, since they're shallow equal + auto &a_mcn = a->GetMappedChildNodesReference(); + auto &b_mcn = b->GetMappedChildNodesReference(); + size_t a_size = a_mcn.size(); + if(a_size != b_mcn.size()) + return false; + + //both empty, so equal + if(a_size == 0) + return true; + + for(auto &[s_id, s] : a_mcn) + { + //make sure it can be found + auto b_found = b_mcn.find(s_id); + if(b_found == end(b_mcn)) + return false; + + EvaluableNode *a_child = s; + EvaluableNode *b_child = b_found->second; + + //if pointers are the same, then they are the same + if(a_child == b_child) + continue; + + //first check if the immediate values are equal + if(!AreShallowEqual(a_child, b_child)) + return false; + + //now check deep values + if(!EvaluableNode::AreDeepEqualGivenShallowEqual(a_child, b_child, checked)) + return false; + } + + //all child nodes are equal + return true; + } + + //if made it here, then both types are ordered + auto &a_ocn = a->GetOrderedChildNodesReference(); + auto &b_ocn = b->GetOrderedChildNodesReference(); + size_t a_size = a_ocn.size(); + if(a_size != b_ocn.size()) + return false; + + //both empty, so equal + if(a_size == 0) + return true; + + for(size_t i = 0; i < a_ocn.size(); i++) + { + EvaluableNode *a_child = a_ocn[i]; + EvaluableNode *b_child = b_ocn[i]; + + //if pointers are the same, then they are the same + if(a_child == b_child) + continue; + + //first check if the immediate values are equal + if(!AreShallowEqual(a_child, b_child)) + return false; + + //now check deep values + if(!EvaluableNode::AreDeepEqualGivenShallowEqual(a_child, b_child, checked)) + return false; + } + + //all child nodes are equal + return true; +} + +bool EvaluableNode::CanNodeTreeBeFlattenedRecurse(EvaluableNode *n, std::vector &stack) +{ + //do a linear find because the logarithmic size of depth should be small enough to make this faster + // than a ReferenceSetType + if(std::find(begin(stack), end(stack), n) != end(stack)) + return false; + + stack.push_back(n); + + //check child nodes + if(n->IsAssociativeArray()) + { + for(auto &[_, e] : n->GetMappedChildNodesReference()) + { + if(e == nullptr) + continue; + + if(!CanNodeTreeBeFlattenedRecurse(e, stack)) + return false; + } + } + else if(!n->IsImmediate()) + { + for(auto &e : n->GetOrderedChildNodesReference()) + { + if(e == nullptr) + continue; + + if(!CanNodeTreeBeFlattenedRecurse(e, stack)) + return false; + } + } + + stack.pop_back(); + + //didn't find itself + return true; +} + +size_t EvaluableNode::GetDeepSizeRecurse(EvaluableNode *n, ReferenceSetType &checked) +{ + //try to insert. if fails, then it has already been inserted, so ignore + if(checked.insert(n).second == false) + return 0; + + //count this one + size_t size = 1; + + //count any labels + size += n->GetNumLabels(); + + //check child nodes + if(n->IsAssociativeArray()) + { + for(auto &[_, e] : n->GetMappedChildNodesReference()) + { + if(e != nullptr) + size += GetDeepSizeRecurse(e, checked); + } + } + else if(!n->IsImmediate()) + { + for(auto &e : n->GetOrderedChildNodesReference()) + { + if(e != nullptr) + size += GetDeepSizeRecurse(e, checked); + } + } + + return size; +} + +size_t EvaluableNode::GetDeepSizeNoCycleRecurse(EvaluableNode *n) +{ + //count this one + size_t size = 1; + + //count any labels + size += n->GetNumLabels(); + + //check child nodes + if(n->IsAssociativeArray()) + { + for(auto &[_, e] : n->GetMappedChildNodesReference()) + { + if(e != nullptr) + size += GetDeepSizeNoCycleRecurse(e); + } + } + else if(!n->IsImmediate()) + { + for(auto &e : n->GetOrderedChildNodesReference()) + { + if(e != nullptr) + size += GetDeepSizeNoCycleRecurse(e); + } + } + + return size; +} + +double EvaluableNode::zeroNumberValue = 0.0; +std::string EvaluableNode::emptyStringValue = ""; +EvaluableNode *EvaluableNode::emptyEvaluableNodeNullptr = nullptr; +std::vector EvaluableNode::emptyStringVector; +std::vector EvaluableNode::emptyStringIdVector; +std::vector EvaluableNode::emptyOrderedChildNodes; +EvaluableNode::AssocType EvaluableNode::emptyMappedChildNodes; diff --git a/src/Amalgam/evaluablenode/EvaluableNode.h b/src/Amalgam/evaluablenode/EvaluableNode.h new file mode 100644 index 00000000..572cbd9e --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNode.h @@ -0,0 +1,1089 @@ +#pragma once + +//project headers: +#include "FastMath.h" +#include "HashMaps.h" +#include "Opcodes.h" +#include "PlatformSpecific.h" +#include "StringInternPool.h" +#include "StringManipulation.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//forward declarations: +class EvaluableNodeManager; + +class EvaluableNode +{ +public: + //set associative container types based on performance needs + + //referencing one EvaluableNode to another + using ReferenceAssocType = FastHashMap; + + //a set of EvaluableNode pointers + using ReferenceSetType = FastHashSet; + + //EvaluableNode pointer to count + using ReferenceCountType = FastHashMap; + + //lookup a keyword string and find the type + using KeywordLookupType = FastHashMap; + + using AssocType = CompactHashMap; + + //constructors + __forceinline EvaluableNode() { InitializeUnallocated(); } + __forceinline EvaluableNode(EvaluableNodeType type, const std::string &string_value) { InitializeType(type, string_value); } + __forceinline EvaluableNode(double value) { InitializeType(value); } + __forceinline EvaluableNode(EvaluableNodeType type) { InitializeType(type); } + __forceinline EvaluableNode(EvaluableNode *n) { InitializeType(n); } + + __forceinline ~EvaluableNode() + { + Invalidate(); + } + + //clears out all data and makes the unusable in the ENT_DEALLOCATED state + void Invalidate(); + + /////////////////////////////////////////// + //Each InitializeType* sets up a given type with appropriate data + inline void InitializeType(EvaluableNodeType _type, const std::string &string_value) + { + type = _type; + attributes.allAttributes = 0; + attributes.individualAttribs.isIdempotent = true; + value.stringValueContainer.stringID = string_intern_pool.CreateStringReference(string_value); + value.stringValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + } + + inline void InitializeType(EvaluableNodeType _type, StringInternPool::StringID string_id) + { + type = _type; + attributes.allAttributes = 0; + value.stringValueContainer.stringID = string_intern_pool.CreateStringReference(string_id); + value.stringValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + } + + //like InitializeType, but hands off the string reference to string_id + inline void InitializeTypeWithReferenceHandoff(EvaluableNodeType _type, StringInternPool::StringID string_id) + { + type = _type; + attributes.allAttributes = 0; + value.stringValueContainer.stringID = string_id; + value.stringValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + } + + constexpr void InitializeType(double number_value) + { + type = ENT_NUMBER; + attributes.allAttributes = 0; + attributes.individualAttribs.isIdempotent = true; + value.numberValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + value.numberValueContainer.numberValue = number_value; + } + + //initializes to ENT_UNINITIALIZED + //useful to mark a node in a hold state before it's ready so it isn't counted as ENT_DEALLOCATED + //but also such that the fields don't need to be initialized or cleared + constexpr void InitializeUnallocated() + { + type = ENT_UNINITIALIZED; + } + + inline void InitializeType(EvaluableNodeType _type) + { + type = _type; + attributes.allAttributes = 0; + attributes.individualAttribs.isIdempotent = IsEvaluableNodeTypePotentiallyIdempotent(_type); + + if(DoesEvaluableNodeTypeUseNumberData(_type)) + { + value.numberValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + value.numberValueContainer.numberValue = 0.0; + attributes.individualAttribs.isIdempotent = true; + } + else if(DoesEvaluableNodeTypeUseStringData(_type)) + { + value.stringValueContainer.stringID = StringInternPool::NOT_A_STRING_ID; + value.stringValueContainer.labelStringID = StringInternPool::NOT_A_STRING_ID; + attributes.individualAttribs.isIdempotent = (_type == ENT_STRING); + } + else if(DoesEvaluableNodeTypeUseAssocData(_type)) + { + type = _type; + attributes.allAttributes = 0; + attributes.individualAttribs.isIdempotent = true; + value.ConstructMappedChildNodes(); + } + else + { + value.ConstructOrderedChildNodes(); + } + } + + //sets the value of the node to that of n and the copy_* parameters indicate what metadata should be copied + void InitializeType(EvaluableNode *n, bool copy_labels = true, bool copy_comments_and_concurrency = true); + + //copies the EvaluableNode n into this. Does not overwrite labels or comments. + void CopyValueFrom(EvaluableNode *n); + + //copies the metadata of the node n into this + void CopyMetadataFrom(EvaluableNode *n); + + //clears the node's metadata + __forceinline void ClearMetadata() + { + ClearComments(); + ClearLabels(); + SetConcurrency(false); + } + + //Evaluates the fraction of the labels of nodes that are the same, 1.0 if no labels on either + //num_common_labels and num_unique_labels are set to the appropriate number in common and number of labels that are unique when the two sets are merged + static void GetNodeCommonAndUniqueLabelCounts(EvaluableNode *n1, EvaluableNode *n2, size_t &num_common_labels, size_t &num_unique_labels); + + //Returns true if the immediate data structure of a is equal to b + static bool AreShallowEqual(EvaluableNode *a, EvaluableNode *b); + + //Returns true if the entire data structure of a is equal in value to the data structure of b + static inline bool AreDeepEqual(EvaluableNode *a, EvaluableNode *b) + { + //if pointers are the same, then they are the same + if(a == b) + return true; + + //first check if the immediate values are equal + if(!AreShallowEqual(a, b)) + return false; + + bool need_cycle_checks = false; + + //since they are shallow equal, check for quick exit + if(a != nullptr && b != nullptr) + { + if(IsEvaluableNodeTypeImmediate(a->GetType()) + && IsEvaluableNodeTypeImmediate(b->GetType())) + return true; + + //only need cycle checks if both a and b need cycle checks, + // otherwise, one will become exhausted and end the comparison + if(a->GetNeedCycleCheck() && b->GetNeedCycleCheck()) + need_cycle_checks = true; + } + + if(need_cycle_checks) + { + ReferenceAssocType checked; + return AreDeepEqualGivenShallowEqual(a, b, &checked); + } + else + { + return AreDeepEqualGivenShallowEqual(a, b, nullptr); + } + } + + //Returns true if this node evaluates to true + static bool IsTrue(EvaluableNode *n); + + //returns true if it is explicitly a string + constexpr bool IsStringValue() + { + return (GetType() == ENT_STRING); + } + + //returns true if it is explicitly a string + static constexpr bool IsStringValue(EvaluableNode *n) + { + if(n == nullptr) + return false; + return n->IsStringValue(); + } + + //Returns true if the node is some form of associative array + constexpr bool IsAssociativeArray() + { + return DoesEvaluableNodeTypeUseAssocData(GetType()); + } + + //Returns true if the node is some form of associative array + static constexpr bool IsAssociativeArray(EvaluableNode *n) + { + if(n == nullptr) + return false; + return n->IsAssociativeArray(); + } + + //returns true if the type is immediate + constexpr bool IsImmediate() + { + return IsEvaluableNodeTypeImmediate(GetType()); + } + + //Returns true if the node is some form of ordered array + constexpr bool IsOrderedArray() + { + return DoesEvaluableNodeTypeUseOrderedData(GetType()); + } + + //Returns true if the node is some form of ordered array + static constexpr bool IsOrderedArray(EvaluableNode *n) + { + if(n == nullptr) + return false; + return n->IsOrderedArray(); + } + + //returns true if the EvaluableNode is of a query type + static constexpr bool IsQuery(EvaluableNode *n) + { + return (n != nullptr && IsEvaluableNodeTypeQuery(n->GetType())); + } + + //Returns the value requested from an associative array regardless of whether the associative array has been interpreted + // into mappedChildNodes or is still in orderedChildNodes + //returns nullptr if not found + static EvaluableNode *RetrieveImmediateAssocValue(EvaluableNode *n, const std::string &key); + + //Returns positive if a is less than b, + // negative if greater, or 0 if equal or not numerically comparable + static int Compare(EvaluableNode *a, EvaluableNode *b); + + //Returns true if the node b is less than node a. If or_equal_to is true, then also returns true if equal + static inline bool IsLessThan(EvaluableNode *a, EvaluableNode *b, bool or_equal_to) + { + int r = Compare(a, b); + if(r < 0) + return true; + if(or_equal_to && r == 0) + return true; + return false; + } + + static inline bool IsStrictlyLessThan(EvaluableNode *a, EvaluableNode *b) + { + return IsLessThan(a, b, false); + } + + //if the node's contents can be represented as a number, which includes numbers, infinity, and even null and NaN, then return true + // otherwise returns false + static constexpr bool CanRepresentValueAsANumber(EvaluableNode *e) + { + if(e == nullptr) + return true; + + switch(e->GetType()) + { + case ENT_NUMBER: + case ENT_TRUE: + case ENT_FALSE: + case ENT_NULL: + return true; + default: + return false; + } + } + + //returns true is node pointer e is nullptr or value of e has type ENT_NULL + static constexpr bool IsNull(EvaluableNode *e) + { + return (e == nullptr || e->GetType() == ENT_NULL); + } + + //returns true if node pointer e resolves to NaN (not a number) when interpreted as a number + static constexpr bool IsNaN(EvaluableNode *e) + { + return (IsNull(e) || FastIsNaN(e->GetNumberValue())); + } + + //returns true if node pointer e resolves to NaS (not a string) when interpreted as a string + static constexpr bool IsNaS(EvaluableNode *e) + { + return (IsNull(e) || e->GetStringID() == string_intern_pool.NOT_A_STRING_ID); + } + + //returns true if node pointer is nullptr, ENT_NULL, NaN number, or NaS string + static constexpr bool IsEmptyNode(EvaluableNode *e) + { + return (IsNull(e) + || (e->IsNativelyNumeric() && FastIsNaN(e->GetNumberValue())) + || (e->IsNativelyString() && e->GetStringID() == string_intern_pool.NOT_A_STRING_ID) ); + } + + //Converts the node to a number + //if null, then will return value_if_null + static double ToNumber(EvaluableNode *e, double value_if_null = std::numeric_limits::quiet_NaN()); + + //returns true if the node can directly be interpreted as a number + static constexpr bool IsNativelyNumeric(EvaluableNode *e) + { + if(e == nullptr) + return true; + + auto type = e->GetType(); + if(type == ENT_NUMBER || type == ENT_NULL) + return true; + + return false; + } + + //returns true if the EvaluableNode uses numeric data + constexpr bool IsNativelyNumeric() + { + return DoesEvaluableNodeTypeUseNumberData(GetType()); + } + + //returns true if the EvaluableNode uses string data + constexpr bool IsNativelyString() + { + return DoesEvaluableNodeTypeUseStringData(GetType()); + } + + //Converts a number to a string in a consistent way that should be used for anything dealing with EvaulableNode + static __forceinline std::string NumberToString(double value) + { + return StringManipulation::NumberToString(value); + } + + static __forceinline std::string NumberToString(size_t value) + { + return StringManipulation::NumberToString(value); + } + + //Converts the node to a string + const static std::string ToString(EvaluableNode *e); + + //converts node to an existing string. If it doesn't exist, it returns NOT_A_STRING_ID + static StringInternPool::StringID ToStringIDIfExists(EvaluableNode *e); + + //converts node to a string. Creates a reference to the string that must be destroyed, regardless of whether the string existed or not (if it did not exist, then it creates one) + static StringInternPool::StringID ToStringIDWithReference(EvaluableNode *e); + + //converts node to a string. Creates a reference to the string that must be destroyed, regardless of whether the string existed or not + // if e is a string, it will clear it and hand the reference to the caller + static StringInternPool::StringID ToStringIDTakingReferenceAndClearing(EvaluableNode *e); + + //returns the comments as a new string + static inline StringInternPool::StringID GetCommentsStringId(EvaluableNode *e) + { + if(e == nullptr) + return StringInternPool::NOT_A_STRING_ID; + return e->GetCommentsStringId(); + } + + //Converts the node to an ENT_ASSOC where the keys are the numbers of the indices + void ConvertOrderedListToNumberedAssoc(); + + //returns true if the node can be flattened, + // that is, contains no cycles when traversing downward and potentially + // duplicating nodes if they are referenced more than once + static inline bool CanNodeTreeBeFlattened(EvaluableNode *n) + { + if(n == nullptr) + return true; + + if(!n->GetNeedCycleCheck()) + return true; + + std::vector stack; + return CanNodeTreeBeFlattenedRecurse(n, stack); + } + + //Returns the number of nodes in the data structure + static inline size_t GetDeepSize(EvaluableNode *n) + { + if(n == nullptr) + return 0; + + if(!n->GetNeedCycleCheck()) + { + return GetDeepSizeNoCycleRecurse(n); + } + else + { + ReferenceSetType checked; + return GetDeepSizeRecurse(n, checked); + } + } + + //Returns the number of bytes of memory that node is currently using + static size_t GetEstimatedNodeSizeInBytes(EvaluableNode *n); + + //gets current type + constexpr EvaluableNodeType &GetType() + { + return type; + } + + //transforms node to new_type, converting data if types are different + // enm is used if it needs to allocate nodes when changing types + //if attempt_to_preserve_immediate_value is true, then it will try to preserve any relevant immediate value + // attempt_to_preserve_immediate_value should be set to false if the value will be immediately overwritten + void SetType(EvaluableNodeType new_type, EvaluableNodeManager *enm, + bool attempt_to_preserve_immediate_value = true); + + //fully clears node and sets it to new_type + inline void ClearAndSetType(EvaluableNodeType new_type) + { + DestructValue(); + InitializeType(new_type); + } + + //sets up number value + void InitNumberValue(); + + //gets the value by reference + constexpr double &GetNumberValue() + { + if(DoesEvaluableNodeTypeUseNumberData(GetType())) + return GetNumberValueReference(); + + //none of the above, return an empty one + return zeroNumberValue; + } + + //sets the number value + inline void SetNumberValue(double v) + { + if(DoesEvaluableNodeTypeUseNumberData(GetType())) + GetNumberValueReference() = v; + } + + //sets up the ability to contain a string + void InitStringValue(); + constexpr StringInternPool::StringID GetStringID() + { + if(DoesEvaluableNodeTypeUseStringData(GetType())) + return GetStringIDReference(); + + return StringInternPool::NOT_A_STRING_ID; + } + void SetStringID(StringInternPool::StringID id); + const std::string &GetStringValue(); + void SetStringValue(const std::string &v); + //gets the string ID and clears the node's string ID, but does not destroy the string reference, + // leaving the reference handling up to the caller + StringInternPool::StringID GetAndClearStringIDWithReference(); + //sets the string but does not create a new reference because the reference has already been created + void SetStringIDWithReferenceHandoff(StringInternPool::StringID id); + + //functions for getting and setting labels by string or by StringID + // all Label functions perform any reference counting management necessary when setting and clearing + std::vector GetLabelsStringIds(); + std::vector GetLabelsStrings(); + void SetLabelsStringIds(const std::vector &label_string_ids); + size_t GetNumLabels(); + const std::string &GetLabel(size_t label_index); + const StringInternPool::StringID GetLabelStringId(size_t label_index); + void RemoveLabel(size_t label_index); + void ClearLabels(); + //reserves the specified number of labels + void ReserveLabels(size_t num_labels); + //if handoff_reference is true, then it will not create a new reference but assume one has already been created + void AppendLabelStringId(StringInternPool::StringID label_string_id, bool handoff_reference = false); + void AppendLabel(const std::string &label); + + //functions for getting and setting node comments by string or by StringID + // all Comment functions perform any reference counting management necessary when setting and clearing + StringInternPool::StringID GetCommentsStringId(); + inline const std::string &GetCommentsString() + { + return string_intern_pool.GetStringFromID(GetCommentsStringId()); + } + + //returns true if has comments + inline bool HasComments() + { + return GetCommentsStringId() != string_intern_pool.NOT_A_STRING_ID; + } + + //splits comment lines and returns a vector of strings of the comment + std::vector GetCommentsSeparateLines(); + //if handoff_reference is true, then it will not create a new reference but assume one has already been created + void SetCommentsStringId(StringInternPool::StringID comments_string_id, bool handoff_reference = false); + void SetComments(const std::string &comments); + void ClearComments(); + void AppendCommentsStringId(StringInternPool::StringID comments_string_id); + void AppendComments(const std::string &comments); + + //returns true if the EvaluableNode is marked with preference for concurrency + constexpr bool GetConcurrency() + { + return attributes.individualAttribs.concurrent; + } + + //sets the EvaluableNode's preference for concurrency + constexpr void SetConcurrency(bool concurrent) + { + attributes.individualAttribs.concurrent = concurrent; + } + + //returns true if the EvaluableNode and all its dependents need to be checked for cycles + constexpr bool GetNeedCycleCheck() + { + return attributes.individualAttribs.needCycleCheck; + } + + //sets the EvaluableNode's needCycleCheck flag + constexpr void SetNeedCycleCheck(bool need_cycle_check) + { + attributes.individualAttribs.needCycleCheck = need_cycle_check; + } + + //returns true if the EvaluableNode and all its dependents are idempotent + constexpr bool GetIsIdempotent() + { + return attributes.individualAttribs.isIdempotent; + } + + //sets the EvaluableNode's idempotentcy flag + constexpr void SetIsIdempotent(bool is_idempotent) + { + attributes.individualAttribs.isIdempotent = is_idempotent; + } + + //marks n and all its parent nodes as needing a cycle check + //nodes_to_parent_nodes is a lookup, for each node the lookup is its parent + static inline void SetParentEvaluableNodesCycleChecks(EvaluableNode *n, ReferenceAssocType &nodes_to_parent_nodes) + { + //mark until/unless have found a cycle + while(n != nullptr && !n->GetNeedCycleCheck()) + { + n->SetNeedCycleCheck(true); + + //attempt to find parent + auto found_parent = nodes_to_parent_nodes.find(n); + if(found_parent == end(nodes_to_parent_nodes)) + return; + + n = found_parent->second; + } + } + + //returns the last garbage collection iteration of this node, 0 if it has not been set before + constexpr uint8_t GetGarbageCollectionIteration() + { + return attributes.individualAttribs.garbageCollectionIteration; + } + + //sets the garbage collection iteration of this node, which defaults to 0 + // values 1, 2, 3 are valid values + constexpr void SetGarbageCollectionIteration(uint8_t gc_collect_iteration) + { + attributes.individualAttribs.garbageCollectionIteration = gc_collect_iteration; + } + + //returns the number of child nodes regardless of mapped or ordered + size_t GetNumChildNodes(); + + void InitOrderedChildNodes(); + //preallocates to_reserve for appending, etc. + inline void ReserveOrderedChildNodes(size_t to_reserve) + { + if(IsOrderedArray()) + GetOrderedChildNodesReference().reserve(to_reserve); + } + + constexpr std::vector &GetOrderedChildNodes() + { + if(IsOrderedArray()) + return GetOrderedChildNodesReference(); + + return emptyOrderedChildNodes; + } + + //using ordered or mapped child nodes as appropriate, transforms into numeric values and passes into store_value + // if node is mapped child nodes, it will use element_names to order populate out and use default_value if any given id is not found + //will use num_expected_elements for immediate values + //store_nomeric_value takes in 3 parameters, the index, a bool if the value was found, and the EvaluableNode of the value + template + static inline void ConvertChildNodesAndStoreValue(EvaluableNode *node, std::vector &element_names, + size_t num_expected_elements, StoreValueFunction store_value) + { + if(node != nullptr) + { + if(node->IsAssociativeArray()) + { + auto &wn_mcn = node->GetMappedChildNodesReference(); + for(size_t i = 0; i < element_names.size(); i++) + { + EvaluableNode *value_en = nullptr; + bool found = false; + auto found_node = wn_mcn.find(element_names[i]); + if(found_node != end(wn_mcn)) + { + value_en = found_node->second; + found = true; + } + + store_value(i, found, value_en); + } + } + else if(node->IsImmediate()) + { + //fill in with the node's value + for(size_t i = 0; i < num_expected_elements; i++) + store_value(i, true, node); + } + else //ordered + { + auto &node_ocn = node->GetOrderedChildNodesReference(); + + for(size_t i = 0; i < node_ocn.size(); i++) + store_value(i, true, node_ocn[i]); + } + } + } + + //Note that ResizeOrderedChildNodes does not initialize new nodes, so they must be initialized by caller + inline void SetOrderedChildNodesSize(size_t new_size) + { + if(IsOrderedArray()) + GetOrderedChildNodesReference().resize(new_size); + } + + void SetOrderedChildNodes(const std::vector &ocn); + void ClearOrderedChildNodes(); + void AppendOrderedChildNode(EvaluableNode *cn); + void AppendOrderedChildNodes(const std::vector &ocn_to_append); + //if the OrderedChildNodes list was using extra memory (if it were resized to be smaller), this would attempt to free extra memory + inline void ReleaseOrderedChildNodesExtraMemory() + { + if(IsOrderedArray()) + GetOrderedChildNodesReference().shrink_to_fit(); + } + + void InitMappedChildNodes(); + //preallocates to_reserve for appending, etc. + inline void ReserveMappedChildNodes(size_t to_reserve) + { + if(IsAssociativeArray()) + GetMappedChildNodesReference().reserve(to_reserve); + } + + constexpr AssocType &GetMappedChildNodes() + { + if(IsAssociativeArray()) + return GetMappedChildNodesReference(); + + return emptyMappedChildNodes; + } + + //if the id exists, returns a pointer to the pointer of the child node + // returns nullptr if the id doesn't exist + inline EvaluableNode **GetMappedChildNode(const std::string &id) + { + StringInternPool::StringID sid = string_intern_pool.GetIDFromString(id); + return GetMappedChildNode(sid); + } + //if the id exists, returns a pointer to the pointer of the child node + // returns nullptr if the id doesn't exist + EvaluableNode **GetMappedChildNode(const StringInternPool::StringID sid); + //returns a pointer to the pointer of the child node, creating it if necessary and populating it with a nullptr + EvaluableNode **GetOrCreateMappedChildNode(const std::string &id); + //returns a pointer to the pointer of the child node, creating it if necessary and populating it with a nullptr + EvaluableNode **GetOrCreateMappedChildNode(const StringInternPool::StringID sid); + // if copy is set to true, then it will copy the map, otherwise it will swap + void SetMappedChildNodes(AssocType &new_mcn, bool copy); + //if overwrite is true, then it will overwrite the value, otherwise it will only set it if it does not exist + // will return true if it was successfully written (false if overwrite is set to false and the key already exists), + // as well as a pointer to where the pointer is stored + std::pair SetMappedChildNode(const std::string &id, EvaluableNode *node, bool overwrite = true); + std::pair SetMappedChildNode(const StringInternPool::StringID sid, EvaluableNode *node, bool overwrite = true); + //like SetMappedChildNode, except the sid already has a reference that is being handed off to this EvaluableNode to manage + bool SetMappedChildNodeWithReferenceHandoff(const StringInternPool::StringID sid, EvaluableNode *node, bool overwrite = true); + void ClearMappedChildNodes(); + //returns the node erased + EvaluableNode *EraseMappedChildNode(const StringInternPool::StringID sid); + void AppendMappedChildNodes(AssocType &mcn_to_append); + +protected: + //defined since it is used as a pointer in EvaluableNodeValue + struct EvaluableNodeExtendedValue; +public: + + //returns true if value contains an extended type + constexpr bool HasExtendedValue() + { return attributes.individualAttribs.hasExtendedValue; } + + //assumes that the EvaluableNode is of type ENT_NUMBER, and returns the value by reference + constexpr double &GetNumberValueReference() + { + if(!HasExtendedValue()) + return value.numberValueContainer.numberValue; + else + return value.extension.extendedValue->value.numberValueContainer.numberValue; + } + + //assumes that the EvaluableNode is of type that holds a string, and returns the value by reference + constexpr StringInternPool::StringID &GetStringIDReference() + { + if(!HasExtendedValue()) + return value.stringValueContainer.stringID; + else + return value.extension.extendedValue->value.stringValueContainer.stringID; + } + + //assumes that the EvaluableNode has ordered child nodes, and returns the value by reference + constexpr std::vector &GetOrderedChildNodesReference() + { + if(!HasExtendedValue()) + return value.orderedChildNodes; + else + return value.extension.extendedValue->value.orderedChildNodes; + } + + //assumes that the EvaluableNode is has mapped child nodes, and returns the value by reference + constexpr AssocType &GetMappedChildNodesReference() + { + if(!HasExtendedValue()) + return value.mappedChildNodes; + else + return value.extension.extendedValue->value.mappedChildNodes; + } + + //if it is storing an immediate value and has room to store a label + constexpr bool HasCompactSingleLabelStorage() + { + return ((type == ENT_NUMBER || type == ENT_STRING || type == ENT_SYMBOL) && !HasExtendedValue()); + } + + //returns a reference to the storage location for a single label + // will only return valid results if HasCompactSingleLabelStorage() is true, so that should be called first + constexpr StringInternPool::StringID &GetCompactSingleLabelStorage() + { + if(type == ENT_NUMBER) + return value.numberValueContainer.labelStringID; + //else assume type == ENT_STRING || type == ENT_SYMBOL + return value.stringValueContainer.labelStringID; + } + +protected: + + //align to the nearest 2-bytes to minimize aligment issues but reduce the overall memory footprint + // while maintaining some alignment +#pragma pack(push, 2) + union EvaluableNodeValue + { + //take care of all setup and cleanup outside of the union + // default to numberValueContainer constructor to allow constexpr + inline EvaluableNodeValue() { } + inline ~EvaluableNodeValue() { } + + inline void ConstructOrderedChildNodes() + { new (&orderedChildNodes) std::vector; } + + inline void DestructOrderedChildNodes() + { orderedChildNodes.~vector(); } + + inline void ConstructMappedChildNodes() + { new (&mappedChildNodes) AssocType; } + + inline void DestructMappedChildNodes() + { + string_intern_pool.DestroyStringReferences(mappedChildNodes, [](auto n) { return n.first; }); + mappedChildNodes.~AssocType(); + } + + //ordered child nodes (when type requires it), meaning and number of childNodes is based on the type of the node + std::vector orderedChildNodes; + + //hash-mapped child nodes (when type requires it), meaning and number of childNodes is based on the type of the node + AssocType mappedChildNodes; + + //when type represents a string, holds the corresponding values + struct EvaluableNodeValueString + { + //string value + StringInternPool::StringID stringID; + + //allow up to one label -- only used when not part of an extended value + StringInternPool::StringID labelStringID; + } stringValueContainer; + + //when type represents a number, holds the corresponding value + struct EvaluableNodeValueNumber + { + //number value + double numberValue; + + //allow up to one label -- only used when not part of an extended value + StringInternPool::StringID labelStringID; + } numberValueContainer; + + struct EvaluableNodeExtension + { + //pointer to store any extra data if EvaluableNode needs multiple fields + EvaluableNodeExtendedValue *extendedValue; + + //comments that appear just above the code represented by this node + StringInternPool::StringID commentsStringId; + } extension; + }; +#pragma pack(pop) + + struct EvaluableNodeExtendedValue + { + //value stored here + EvaluableNodeValue value; + + //labels of the node for referencing and querying + std::vector labelsStringIds; + }; + + //makes sure that the extendedValue is set appropriately so that it can be used to hold additional data + void EnsureEvaluableNodeExtended(); + + //destructs the value so that the node can be reused + // note that the value should be considered uninitialized + void DestructValue(); + + //Returns true if the entire data structure of a is equal in value to the data structure of b + // but does not check the immediate nodes a and b to see if they are shallow equal (this is assumed to be done by the caller for performance) + // Assists the public function AreDeepEqual + // if checked is nullptr, then it won't check for cycles + static bool AreDeepEqualGivenShallowEqual(EvaluableNode *a, EvaluableNode *b, ReferenceAssocType *checked); + + //recursive helper function for CanNodeTreeBeFlattened + // assumes n is not nullptr + static bool CanNodeTreeBeFlattenedRecurse(EvaluableNode *n, std::vector &stack); + + //Returns the deep size, excluding nodes already checked + // Assists the public function GetDeepSize + static size_t GetDeepSizeRecurse(EvaluableNode *n, ReferenceSetType &checked); + + //Like GetDeepSizeRecurse, but assumes there are no cycles in n + static size_t GetDeepSizeNoCycleRecurse(EvaluableNode *n); + + EvaluableNodeValue value; + + //Executable/data type of the node + EvaluableNodeType type; + + //make sure this only takes up one byte +#pragma pack(push, 1) + union EvaluableNodeAttributesType + { + //quick way to initialize all attributes to 0 + uint8_t allAttributes; + struct + { + //if true, then contains an extended type + bool hasExtendedValue : 1; + //if true, then this node and any nodes it contains may have a cycle so needs to be checked + bool needCycleCheck : 1; + //if true, then this node and any nodes it contains are idempotent + bool isIdempotent : 1; + //if true, then the node is marked for concurrency + bool concurrent : 1; + //the iteration used for garbage collection; an EvaluableNode should be initialized to 0, + // and values 1-3 are reserved for garbage collection cycles + uint8_t garbageCollectionIteration : 2; + } individualAttribs; + }; +#pragma pack(pop) + + //fields contained within the current set of data + EvaluableNodeAttributesType attributes; + + //values used to be able to return a reference + static double zeroNumberValue; + static std::string emptyStringValue; + static EvaluableNode *emptyEvaluableNodeNullptr; + static std::vector emptyStringVector; + static std::vector emptyStringIdVector; + static std::vector emptyOrderedChildNodes; + static AssocType emptyMappedChildNodes; +}; + +//EvaluableNode type upper taxonomy for determining the most generic way +// concrete values can be stored for the EvaluableNode. It is intended to +// group types into the highest specificity that it is worth using to +// compare two values based on their collective types +enum EvaluableNodeImmediateValueType +{ + ENIVT_NOT_EXIST, //there is nothing to even hold the data + ENIVT_NULL, //no data being held + ENIVT_NUMBER, //number + ENIVT_STRING_ID, //stringID + ENIVT_CODE //code (more general than any of the above) +}; + +//structure that can hold the most immediate value type of an EvaluableNode +// EvaluableNodeImmediateValueType can be used to communicate which type of data is being held +union EvaluableNodeImmediateValue +{ + constexpr EvaluableNodeImmediateValue() + : number(std::numeric_limits::quiet_NaN()) + { } + + constexpr EvaluableNodeImmediateValue(double _number) + : number(_number) + { } + + constexpr EvaluableNodeImmediateValue(StringInternPool::StringID string_id) + : stringID(string_id) + { } + + constexpr EvaluableNodeImmediateValue(EvaluableNode *_code) + : code(_code) + { } + + constexpr EvaluableNodeImmediateValue(const EvaluableNodeImmediateValue &eniv) + : code(eniv.code) + { } + + constexpr EvaluableNodeImmediateValue &operator =(const EvaluableNodeImmediateValue &eniv) + { + code = eniv.code; + return *this; + } + + //copies the value from en and returns the EvaluableNodeConcreteValueType + EvaluableNodeImmediateValueType CopyValueFromEvaluableNode(EvaluableNode *en) + { + if(en == nullptr) + { + number = std::numeric_limits::quiet_NaN(); + return ENIVT_NULL; + } + + auto en_type = en->GetType(); + if(en_type == ENT_NULL) + { + number = std::numeric_limits::quiet_NaN(); + return ENIVT_NULL; + } + + if(en_type == ENT_NUMBER) + { + number = en->GetNumberValue(); + return ENIVT_NUMBER; + } + + if(en_type == ENT_STRING) + { + stringID = en->GetStringID(); + return ENIVT_STRING_ID; + } + + code = en; + return ENIVT_CODE; + } + + static bool AreEqual(EvaluableNodeImmediateValueType type_1, EvaluableNodeImmediateValue &value_1, + EvaluableNodeImmediateValueType type_2, EvaluableNodeImmediateValue &value_2) + { + if(type_1 != type_2) + return false; + + //types are the same, just use type_1 for reference + if(type_1 == ENIVT_NUMBER) + { + if(EqualIncludingNaN(value_1.number, value_2.number)) + return false; + } + else if(type_1 == ENIVT_STRING_ID) + { + if(value_1.stringID == value_2.stringID) + return false; + } + else + { + if(EvaluableNode::AreDeepEqual(value_1.code, value_2.code)) + return false; + } + + return true; + } + + double number; + StringInternPool::StringID stringID; + EvaluableNode *code; +}; + +//used for storing a value and type together +class EvaluableNodeImmediateValueWithType +{ +public: + constexpr EvaluableNodeImmediateValueWithType() + : nodeType(ENIVT_NULL) + { } + + constexpr EvaluableNodeImmediateValueWithType(double number) + : nodeType(ENIVT_NUMBER), nodeValue(number) + { } + + constexpr EvaluableNodeImmediateValueWithType(StringInternPool::StringID string_id) + : nodeType(ENIVT_STRING_ID), nodeValue(string_id) + { } + + constexpr EvaluableNodeImmediateValueWithType(EvaluableNode *code) + : nodeType(ENIVT_CODE), nodeValue(code) + { } + + constexpr EvaluableNodeImmediateValueWithType(const EvaluableNodeImmediateValueWithType &enimvwt) + : nodeType(enimvwt.nodeType), nodeValue(enimvwt.nodeValue) + { } + + constexpr EvaluableNodeImmediateValueWithType &operator =(const EvaluableNodeImmediateValueWithType &enimvwt) + { + nodeType = enimvwt.nodeType; + nodeValue = enimvwt.nodeValue; + return *this; + } + + //copies the value from en and returns the EvaluableNodeConcreteValueType + void CopyValueFromEvaluableNode(EvaluableNode *en) + { + if(en == nullptr) + { + nodeType = ENIVT_NULL; + nodeValue = EvaluableNodeImmediateValue(std::numeric_limits::quiet_NaN()); + return; + } + + auto en_type = en->GetType(); + if(en_type == ENT_NULL) + { + nodeType = ENIVT_NULL; + nodeValue = EvaluableNodeImmediateValue(std::numeric_limits::quiet_NaN()); + return; + } + + if(en_type == ENT_NUMBER) + { + nodeType = ENIVT_NUMBER; + nodeValue = EvaluableNodeImmediateValue(en->GetNumberValue()); + return; + } + + if(en_type == ENT_STRING) + { + nodeType = ENIVT_STRING_ID; + nodeValue = EvaluableNodeImmediateValue(en->GetStringID()); + return; + } + + nodeType = ENIVT_CODE; + nodeValue = EvaluableNodeImmediateValue(en); + } + + static inline bool AreEqual(EvaluableNodeImmediateValueWithType &a, EvaluableNodeImmediateValueWithType &b) + { + return EvaluableNodeImmediateValue::AreEqual(a.nodeType, a.nodeValue, b.nodeType, b.nodeValue); + } + + EvaluableNodeImmediateValueType nodeType; + EvaluableNodeImmediateValue nodeValue; +}; diff --git a/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp b/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp new file mode 100644 index 00000000..8af81afe --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNodeManagement.cpp @@ -0,0 +1,897 @@ +//project headers: +#include "EvaluableNodeManagement.h" + +//system headers: +#include +#include +#include +#include + +const double EvaluableNodeManager::allocExpansionFactor = 1.5; +const ExecutionCycleCountCompactDelta EvaluableNodeManager::minCycleCountBetweenGarbageCollects = 150000; + +EvaluableNodeManager::EvaluableNodeManager() +{ + firstUnusedNodeIndex = 0; + executionCyclesSinceLastGarbageCollection = 0; +} + +EvaluableNodeManager::~EvaluableNodeManager() +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(managerAttributesMutex); +#endif + + for(auto &n : nodes) + delete n; +} + +EvaluableNode *EvaluableNodeManager::AllocNode(EvaluableNode *original, EvaluableNodeMetadataModifier metadata_modifier) +{ + EvaluableNode *n = AllocUninitializedNode(); + n->InitializeType(original, metadata_modifier == ENMM_NO_CHANGE, metadata_modifier != ENMM_REMOVE_ALL); + + if(metadata_modifier == ENMM_LABEL_ESCAPE_INCREMENT) + { + size_t num_labels = original->GetNumLabels(); + n->ReserveLabels(num_labels); + + //add # in front + for(size_t i = 0; i < num_labels; i++) + { + std::string label = "#" + original->GetLabel(i); + n->AppendLabel(label); + } + } + else if(metadata_modifier == ENMM_LABEL_ESCAPE_DECREMENT) + { + size_t num_labels = original->GetNumLabels(); + n->ReserveLabels(num_labels); + + //remove # in front + for(size_t i = 0; i < num_labels; i++) + { + std::string label = original->GetLabel(i); + if(label.size() > 0 && label[0] == '#') + label = label.substr(1); + + n->AppendLabel(label); + } + } + + return n; +} + +EvaluableNode *EvaluableNodeManager::AllocListNodeWithOrderedChildNodes(EvaluableNodeType child_node_type, size_t num_child_nodes) +{ + size_t num_allocated = 0; + size_t num_to_alloc = num_child_nodes + 1; + + EvaluableNode *retval = nullptr; + + //start off allocating the parent node, then switch to child_node_type + EvaluableNodeType cur_type = ENT_LIST; + + //ordered child nodes destination; preallocate outside of the lock (for performance) and swap in + std::vector *ocn_ptr = nullptr; + std::vector ocn_buffer; + ocn_buffer.resize(num_child_nodes); + + //outer loop needed for multithreading, but doesn't hurt anything for single threading + while(num_allocated < num_to_alloc) + { + + #ifdef MULTITHREAD_SUPPORT + //attempt to allocate as many as possible using an atomic without write locking + Concurrency::ReadLock lock(managerAttributesMutex); + #endif + + for(; num_allocated < num_to_alloc; num_allocated++) + { + //attempt to allocate a node and make sure it's valid + size_t allocated_index = firstUnusedNodeIndex++; + if(allocated_index < nodes.size()) + { + if(nodes[allocated_index] != nullptr) + { + //before releasing the lock, make sure it has an allocated type, otherwise it could get grabbed by another thread + nodes[allocated_index]->InitializeType(cur_type); + } + else //allocate if nullptr + nodes[allocated_index] = new EvaluableNode(cur_type); + + //if first node, populate the parent node + if(num_allocated == 0) + { + //prep parent node + retval = nodes[allocated_index]; + + //get the pointer to place child elements, + // but swap out the preallocated ordered child nodes + ocn_ptr = &retval->GetOrderedChildNodes(); + std::swap(ocn_buffer, *ocn_ptr); + + //advance type to child node type + cur_type = child_node_type; + } + else //set the appropritae child node + { + (*ocn_ptr)[num_allocated - 1] = nodes[allocated_index]; + } + } + else + { + //the node wasn't valid; put it back and do a write lock to allocate more + --firstUnusedNodeIndex; + break; + } + } + + //if have allocated enough, just return + if(num_allocated == num_to_alloc) + return retval; + + #ifdef MULTITHREAD_SUPPORT + + //don't have enough nodes, so need to attempt a write lock to allocate more + lock.unlock(); + Concurrency::WriteLock write_lock(managerAttributesMutex); + + //try again after write lock to allocate a node in case another thread has performed the allocation + //already have the write lock, so don't need to worry about another thread stealing firstUnusedNodeIndex + #endif + + size_t num_nodes = nodes.size(); + size_t num_nodes_needed = firstUnusedNodeIndex + (num_to_alloc - num_allocated); + + //if don't currently have enough free nodes to meet the needs, then expand the allocation + if(num_nodes_needed > num_nodes) + { + size_t nodes_to_allocate = static_cast(allocExpansionFactor * num_nodes_needed) + 1; + + //fill new EvaluableNode slots with nullptr + nodes.resize(num_nodes + nodes_to_allocate, nullptr); + } + } + + //shouldn't make it here + return retval; +} + +bool EvaluableNodeManager::RecommendGarbageCollection() +{ + //makes sure to perform garbage collection between every opcode to find memory reference errors +#ifdef PEDANTIC_GARBAGE_COLLECTION + return true; +#endif + +#ifdef MULTITHREAD_SUPPORT + if(executionCyclesSinceLastGarbageCollection > minCycleCountBetweenGarbageCollects * static_cast(Concurrency::threadPool.GetNumActiveThreads())) +#else + if(executionCyclesSinceLastGarbageCollection > minCycleCountBetweenGarbageCollects) +#endif + { + auto cur_size = GetNumberOfUsedNodes(); + + size_t next_expansion_size = static_cast(cur_size * allocExpansionFactor); + if(next_expansion_size < nodes.size()) + { + executionCyclesSinceLastGarbageCollection = 0; + return false; + } + + return true; + } + + return false; +} + +#ifdef MULTITHREAD_SUPPORT +void EvaluableNodeManager::CollectGarbage(Concurrency::ReadLock *memory_modification_lock) +#else +void EvaluableNodeManager::CollectGarbage() +#endif +{ + if(!RecommendGarbageCollection()) + return; + +#ifdef MULTITHREAD_SUPPORT + + //free lock so can attempt to enter write lock to collect garbage + if(memory_modification_lock != nullptr) + memory_modification_lock->unlock(); + + //keep trying to acquire write lock to see if this thread wins the race to collect garbage + Concurrency::WriteLock write_lock(memoryModificationMutex, std::defer_lock); + do + { + if(!RecommendGarbageCollection()) + { + if(memory_modification_lock != nullptr) + memory_modification_lock->lock(); + return; + } + + } while(!write_lock.try_lock()); + + //double-check still needs collection, and not that another thread collected it + if(!RecommendGarbageCollection()) + { + write_lock.unlock(); + if(memory_modification_lock != nullptr) + memory_modification_lock->lock(); + return; + } +#endif + + //perform garbage collection + FreeAllNodesExceptReferencedNodes(); + +#ifdef MULTITHREAD_SUPPORT + //free the unique lock and reacquire the shared lock + write_lock.unlock(); + if(memory_modification_lock != nullptr) + memory_modification_lock->lock(); +#endif +} + +void EvaluableNodeManager::FreeAllNodes() +{ + //get rid of any extra memory + for(size_t i = 0; i < firstUnusedNodeIndex; i++) + nodes[i]->Invalidate(); + +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(managerAttributesMutex); +#endif + + firstUnusedNodeIndex = 0; + + //update details since last garbage collection + executionCyclesSinceLastGarbageCollection = 0; +} + +EvaluableNode *EvaluableNodeManager::AllocUninitializedNode() +{ +#ifdef MULTITHREAD_SUPPORT + //attempt to allocate using an atomic without write locking + Concurrency::ReadLock lock(managerAttributesMutex); + + //attempt to allocate a node and make sure it's valid + size_t allocated_index = firstUnusedNodeIndex++; + if(allocated_index < nodes.size()) + { + if(nodes[allocated_index] != nullptr) + { + //before releasing the lock, make sure it has an allocated type, otherwise it could get grabbed by another thread + nodes[allocated_index]->InitializeUnallocated(); + } + else //allocate if nullptr + nodes[allocated_index] = new EvaluableNode(); + + return nodes[allocated_index]; + } + //the node wasn't valid; put it back and do a write lock to allocate more + --firstUnusedNodeIndex; + + //don't have enough nodes, so need to attempt a write lock to allocate more + lock.unlock(); + Concurrency::WriteLock write_lock(managerAttributesMutex); + + //try again after write lock to allocate a node in case another thread has performed the allocation + //already have the write lock, so don't need to worry about another thread stealing firstUnusedNodeIndex +#endif + + size_t num_nodes = nodes.size(); + if(num_nodes > firstUnusedNodeIndex) + { + if(nodes[firstUnusedNodeIndex] != nullptr) + { + #ifdef MULTITHREAD_SUPPORT + //before releasing the lock, make sure it has an allocated type, otherwise it could get grabbed by another thread + nodes[firstUnusedNodeIndex]->InitializeUnallocated(); + #endif + } + else //allocate if nullptr + nodes[firstUnusedNodeIndex] = new EvaluableNode(); + + return nodes[firstUnusedNodeIndex++]; + } + + //ran out, so need another node; push a bunch on the heap so don't need to reallocate as often and slow down garbage collection + size_t nodes_to_allocate = static_cast(allocExpansionFactor * num_nodes) + 1; //preallocate additional resources, plus current node + + //fill new EvaluableNode slots with nullptr + nodes.resize(num_nodes + nodes_to_allocate, nullptr); + + nodes[firstUnusedNodeIndex] = new EvaluableNode(); + return nodes[firstUnusedNodeIndex++]; +} + +void EvaluableNodeManager::FreeAllNodesExceptReferencedNodes() +{ + if(nodes.size() == 0) + return; + + uint8_t cur_gc_collect_iteration = 1; + + //set to contain everything that is referenced + SetAllReferencedNodesGCCollectIteration(cur_gc_collect_iteration); + + //start with a clean slate, and swap everything in use into the in-use region + size_t lowest_known_unused_index = firstUnusedNodeIndex; //will store any unused nodes up here; start at what was previously known to be the max, as those above don't need to be rechecked + //clear firstUnusedNodeIndex to signal to other threads that they won't need to do garbage collection + firstUnusedNodeIndex = 0; + + //create a temporary variable for multithreading as to not use the atomic variable to slow things down + size_t first_unused_node_index_temp = 0; + while(first_unused_node_index_temp < lowest_known_unused_index) + { + //nodes can't be nullptr below firstUnusedNodeIndex + auto &cur_node_ptr = nodes[first_unused_node_index_temp]; + + //if the node has been found on this iteration and set to the current iteration count, then move on + if(cur_node_ptr->GetGarbageCollectionIteration() == cur_gc_collect_iteration) + { + first_unused_node_index_temp++; + } + else //collect the node + { + //free any extra memory used, since this node is no longer needed + if(cur_node_ptr->GetType() != ENT_DEALLOCATED) + cur_node_ptr->Invalidate(); + + //see if out of things to free; if so exit early + if(lowest_known_unused_index == 0) + break; + + //put the node up at the top where unused memory resides and reduce lowest_known_unused_index + std::swap(cur_node_ptr, nodes[--lowest_known_unused_index]); + } + } + + //assign back to the atomic variable + firstUnusedNodeIndex = first_unused_node_index_temp; + + //reset garbage collection iteration as it has been counted as referenced + //set to contain everything that is referenced, which could be borrowed nodes from outside of the entity + // which is why it can't just iterate over nodes + SetAllReferencedNodesGCCollectIteration(0); + + //update details since last garbage collection + executionCyclesSinceLastGarbageCollection = 0; +} + +void EvaluableNodeManager::FreeNodeTreeRecurse(EvaluableNode *tree) +{ + if(tree->IsAssociativeArray()) + { + for(auto &[_, e] : tree->GetMappedChildNodesReference()) + { + if(e != nullptr) + FreeNodeTreeRecurse(e); + } + } + else + { + for(auto &e : tree->GetOrderedChildNodes()) + { + if(e != nullptr) + FreeNodeTreeRecurse(e); + } + } + + tree->Invalidate(); +} + +void EvaluableNodeManager::FreeNodeTreeWithCyclesRecurse(EvaluableNode *tree) +{ + if(tree->IsAssociativeArray()) + { + //pull the mapped child nodes out of the tree before invalidating it + //need to invalidate before call child nodes to prevent infinite recrusion loop + EvaluableNode::AssocType mcn; + auto &tree_mcn = tree->GetMappedChildNodesReference(); + std::swap(mcn, tree_mcn); + tree->Invalidate(); + + for(auto &[_, e] : mcn) + { + if(e != nullptr && e->GetType() != ENT_DEALLOCATED) + FreeNodeTreeWithCyclesRecurse(e); + } + + //free the references + string_intern_pool.DestroyStringReferences(mcn, [](auto n) { return n.first; }); + } + else if(tree->IsImmediate()) + { + tree->Invalidate(); + } + else //ordered + { + //pull the ordered child nodes out of the tree before invalidating it + //need to invalidate before call child nodes to prevent infinite recrusion loop + std::vector ocn; + auto &tree_ocn = tree->GetOrderedChildNodesReference(); + std::swap(ocn, tree_ocn); + tree->Invalidate(); + + for(auto &e : ocn) + { + if(e != nullptr && e->GetType() != ENT_DEALLOCATED) + FreeNodeTreeWithCyclesRecurse(e); + } + } +} + +void EvaluableNodeManager::ModifyLabels(EvaluableNode *n, EvaluableNodeMetadataModifier metadata_modifier) +{ + size_t num_labels = n->GetNumLabels(); + if(num_labels == 0) + return; + + if(metadata_modifier == ENMM_NO_CHANGE) + return; + + if(metadata_modifier == ENMM_REMOVE_ALL) + { + n->ClearLabels(); + n->ClearComments(); + return; + } + + if(num_labels == 1) + { + std::string label_string = n->GetLabel(0); + n->ClearLabels(); + + if(metadata_modifier == ENMM_LABEL_ESCAPE_INCREMENT) + { + label_string.insert(begin(label_string), '#'); + n->AppendLabel(label_string); + } + else if(metadata_modifier == ENMM_LABEL_ESCAPE_DECREMENT) + { + //remove # in front + if(label_string.size() > 0 && label_string[0] == '#') + label_string.erase(begin(label_string)); + + n->AppendLabel(label_string); + } + } + + //remove all labels and turn into strings + auto string_labels = n->GetLabelsStrings(); + n->ClearLabels(); + + if(metadata_modifier == ENMM_LABEL_ESCAPE_INCREMENT) + { + //add # in front + for(auto &label : string_labels) + n->AppendLabel("#" + label); + } + else if(metadata_modifier == ENMM_LABEL_ESCAPE_DECREMENT) + { + //remove # in front + for(auto &label : string_labels) + { + if(label.size() > 0 && label[0] == '#') + label = label.substr(1); + + n->AppendLabel(label); + } + } +} + +void EvaluableNodeManager::KeepNodeReference(EvaluableNode *en) +{ + if(en == nullptr) + return; + +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(managerAttributesMutex); +#endif + + //attempt to put in value 1 for the reference + auto [inserted_entry, inserted] = nodesCurrentlyReferenced.insert(std::make_pair(en, 1)); + + //if couldn't insert because already referenced, then increment + if(!inserted) + inserted_entry->second++; +} + +void EvaluableNodeManager::FreeNodeReference(EvaluableNode *en) +{ + if(en == nullptr) + return; + +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(managerAttributesMutex); +#endif + + //get reference count + auto node = nodesCurrentlyReferenced.find(en); + + //don't do anything if not counted + if(node == nodesCurrentlyReferenced.end()) + return; + + //if it has sufficient refcount, then just decrement + if(node->second > 1) + node->second--; + else //otherwise remove reference + nodesCurrentlyReferenced.erase(node); +} + +void EvaluableNodeManager::CompactAllocatedNodes() +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock write_lock(managerAttributesMutex); +#endif + + size_t lowest_known_unused_index = firstUnusedNodeIndex; //store any unused nodes here + + //start with a clean slate, and swap everything in use into the in-use region + firstUnusedNodeIndex = 0; + + //just in case empty + if(nodes.size() == 0) + return; + + while(firstUnusedNodeIndex < lowest_known_unused_index) + { + if(nodes[firstUnusedNodeIndex] != nullptr && nodes[firstUnusedNodeIndex]->GetType() != ENT_DEALLOCATED) + firstUnusedNodeIndex++; + else + { + //see if out of things to free; if so exit early + if(lowest_known_unused_index == 0) + break; + + //put the node up at the edge of unused memory, grab the next lowest node and pull it down to increase density + std::swap(nodes[firstUnusedNodeIndex], nodes[lowest_known_unused_index - 1]); + lowest_known_unused_index--; + } + } +} + +size_t EvaluableNodeManager::GetEstimatedTotalReservedSizeInBytes() +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(managerAttributesMutex); +#endif + + size_t total_size = 0; + for(auto &a : nodes) + total_size += EvaluableNode::GetEstimatedNodeSizeInBytes(a); + + return total_size; +} + +size_t EvaluableNodeManager::GetEstimatedTotalUsedSizeInBytes() +{ +#ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(managerAttributesMutex); +#endif + + size_t total_size = 0; + for(size_t i = 0; i < firstUnusedNodeIndex; i++) + total_size += EvaluableNode::GetEstimatedNodeSizeInBytes(nodes[i]); + + return total_size; +} + +void EvaluableNodeManager::ValidateEvaluableNodeTreeMemoryIntegrity(EvaluableNode *en) +{ + if(en == nullptr) + return; + + static EvaluableNode::ReferenceSetType checked; + checked.clear(); + return ValidateEvaluableNodeTreeMemoryIntegrityRecurse(en, checked); +} + +std::pair EvaluableNodeManager::DeepAllocCopy(EvaluableNode *tree, DeepAllocCopyParams &dacp) +{ + //attempt to insert a new reference for this node, start with null + auto [inserted_copy, inserted] = dacp.references->insert(std::make_pair(tree, nullptr)); + + //can't insert, so already have a copy + // need to indicate that it has a cycle + if(!inserted) + return std::make_pair(inserted_copy->second, true); + + EvaluableNode *copy = AllocNode(tree, dacp.labelModifier); + + //shouldn't happen, but just to be safe + if(copy == nullptr) + return std::make_pair(nullptr, false); + + //start without needing a cycle check in case it can be cleared + copy->SetNeedCycleCheck(false); + + //write the value to the iterator from the earlier insert + inserted_copy->second = copy; + + //copy and update any child nodes + if(copy->IsAssociativeArray()) + { + auto ©_mcn = copy->GetMappedChildNodesReference(); + for(auto &[_, s] : copy_mcn) + { + //get current item in list + EvaluableNode *n = s; + if(n == nullptr) + continue; + + //make copy; if need cycle check, then mark it on the parent copy + auto [child_copy, need_cycle_check] = DeepAllocCopy(n, dacp); + if(need_cycle_check) + copy->SetNeedCycleCheck(true); + + //replace item in assoc with copy + s = child_copy; + } + } + else + { + auto ©_ocn = copy->GetOrderedChildNodes(); + for(size_t i = 0; i < copy_ocn.size(); i++) + { + //get current item in list + EvaluableNode *n = copy_ocn[i]; + if(n == nullptr) + continue; + + //make copy; if need cycle check, then mark it on the parent copy + auto [child_copy, need_cycle_check] = DeepAllocCopy(n, dacp); + if(need_cycle_check) + copy->SetNeedCycleCheck(true); + + //replace current item in list with copy + copy_ocn[i] = child_copy; + } + } + + return std::make_pair(copy, copy->GetNeedCycleCheck()); +} + +#ifdef _OPENMP +EvaluableNode *EvaluableNodeManager::NonCycleDeepAllocCopy(EvaluableNode *tree, EvaluableNodeMetadataModifier metadata_modifier, bool parallelize) +#else +EvaluableNode *EvaluableNodeManager::NonCycleDeepAllocCopy(EvaluableNode *tree, EvaluableNodeMetadataModifier metadata_modifier) +#endif +{ + EvaluableNode *copy = nullptr; + #pragma omp critical + { + copy = AllocNode(tree, metadata_modifier); + } + + if(copy->IsAssociativeArray()) + { + //for any mapped children, copy and update + for(auto &[_, s] : copy->GetMappedChildNodesReference()) + { + //get current item in list + EvaluableNode *n = s; + if(n == nullptr) + continue; + + //replace item in list with copy + #ifdef _OPENMP + s = NonCycleDeepAllocCopy(n, metadata_modifier, parallelize); + #else + s = NonCycleDeepAllocCopy(n, metadata_modifier); + #endif + } + } + else if(!copy->IsImmediate()) + { + //for any ordered children, copy and update + auto ©_ocn = copy->GetOrderedChildNodesReference(); + + #pragma omp parallel for schedule(static) if(parallelize && copy->GetOrderedChildNodes().size() > 16) + for(int64_t i = 0; i < static_cast(copy_ocn.size()); i++) + { + //get current item in list + EvaluableNode *n = copy_ocn[i]; + if(n == nullptr) + continue; + + //replace current item in list with copy + #ifdef _OPENMP + copy_ocn[i] = NonCycleDeepAllocCopy(n, metadata_modifier, parallelize); + #else + copy_ocn[i] = NonCycleDeepAllocCopy(n, metadata_modifier); + #endif + } + } + + return copy; +} + +void EvaluableNodeManager::ModifyLabelsForNodeTree(EvaluableNode *tree, EvaluableNode::ReferenceSetType &checked, EvaluableNodeMetadataModifier metadata_modifier) +{ + //attempt to insert; if new, mark as not needing a cycle check yet + // though that may be changed when child nodes are evaluated below + auto [_, inserted] = checked.insert(tree); + if(inserted) + tree->SetNeedCycleCheck(false); + else //already exists, nothing to do + return; + + ModifyLabels(tree, metadata_modifier); + + if(tree->IsAssociativeArray()) + { + for(auto &[cn_id, cn] : tree->GetMappedChildNodesReference()) + { + if(cn == nullptr) + continue; + + ModifyLabelsForNodeTree(cn, checked, metadata_modifier); + } + } + else if(!tree->IsImmediate()) + { + for(auto cn : tree->GetOrderedChildNodesReference()) + { + if(cn == nullptr) + continue; + + ModifyLabelsForNodeTree(cn, checked, metadata_modifier); + } + } +} + +void EvaluableNodeManager::NonCycleModifyLabelsForNodeTree(EvaluableNode *tree, EvaluableNodeMetadataModifier metadata_modifier) +{ + ModifyLabels(tree, metadata_modifier); + + if(tree->IsAssociativeArray()) + { + for(auto &[_, cn] : tree->GetMappedChildNodesReference()) + { + if(cn == nullptr) + continue; + + NonCycleModifyLabelsForNodeTree(cn, metadata_modifier); + } + } + else if(!tree->IsImmediate()) + { + for(auto cn : tree->GetOrderedChildNodesReference()) + { + if(cn == nullptr) + continue; + + NonCycleModifyLabelsForNodeTree(cn, metadata_modifier); + } + } +} + +std::pair EvaluableNodeManager::UpdateFlagsForNodeTreeRecurse(EvaluableNode *tree, EvaluableNode::ReferenceSetType &checked) +{ + //attempt to insert; if new, mark as not needing a cycle check yet + // though that may be changed when child nodes are evaluated below + auto [_, inserted] = checked.insert(tree); + if(inserted) + tree->SetNeedCycleCheck(false); + else //already exists, notify caller + return std::make_pair(true, tree->GetIsIdempotent()); + + bool is_idempotent = (IsEvaluableNodeTypePotentiallyIdempotent(tree->GetType()) && (tree->GetNumLabels() == 0)); + + if(tree->IsAssociativeArray()) + { + bool need_cycle_check = false; + + for(auto &[cn_id, cn] : tree->GetMappedChildNodesReference()) + { + if(cn == nullptr) + continue; + + auto [cn_need_cycle_check, cn_is_idempotent] = UpdateFlagsForNodeTreeRecurse(cn, checked); + + //update flags for tree + if(cn_need_cycle_check) + need_cycle_check = true; + + if(!cn_is_idempotent) + is_idempotent = false; + } + + tree->SetNeedCycleCheck(need_cycle_check); + tree->SetIsIdempotent(is_idempotent); + return std::make_pair(need_cycle_check, is_idempotent); + } + else if(!tree->IsImmediate()) + { + bool need_cycle_check = false; + + for(auto cn : tree->GetOrderedChildNodesReference()) + { + if(cn == nullptr) + continue; + + auto [cn_need_cycle_check, cn_is_idempotent] = UpdateFlagsForNodeTreeRecurse(cn, checked); + + //update flags for tree + if(cn_need_cycle_check) + need_cycle_check = true; + + if(!cn_is_idempotent) + is_idempotent = false; + } + + tree->SetNeedCycleCheck(need_cycle_check); + tree->SetIsIdempotent(is_idempotent); + return std::make_pair(need_cycle_check, is_idempotent); + } + else //immediate value + { + tree->SetIsIdempotent(is_idempotent); + tree->SetNeedCycleCheck(false); + return std::make_pair(false, is_idempotent); + } +} + +void EvaluableNodeManager::SetAllReferencedNodesGCCollectIterationRecurse(EvaluableNode *tree, uint8_t gc_collect_iteration) +{ + //if entering this function, then the node hasn't been marked yet + tree->SetGarbageCollectionIteration(gc_collect_iteration); + + if(tree->IsAssociativeArray()) + { + for(auto &[_, e] : tree->GetMappedChildNodesReference()) + { + if(e == nullptr || e->GetGarbageCollectionIteration() == gc_collect_iteration) + continue; + + SetAllReferencedNodesGCCollectIterationRecurse(e, gc_collect_iteration); + } + } + else if(!tree->IsImmediate()) + { + for(auto &e : tree->GetOrderedChildNodesReference()) + { + if(e == nullptr || e->GetGarbageCollectionIteration() == gc_collect_iteration) + continue; + + SetAllReferencedNodesGCCollectIterationRecurse(e, gc_collect_iteration); + } + } +} + +void EvaluableNodeManager::ValidateEvaluableNodeTreeMemoryIntegrityRecurse(EvaluableNode *en, EvaluableNode::ReferenceSetType &checked) +{ + auto [_, inserted] = checked.insert(en); + if(!inserted) + return; + + if(en->GetType() == ENT_DEALLOCATED) + assert(false); + + if(en->IsAssociativeArray()) + { + for(auto &[cn_id, cn] : en->GetMappedChildNodes()) + { + if(cn == nullptr) + continue; + + ValidateEvaluableNodeTreeMemoryIntegrityRecurse(cn, checked); + } + } + else if(!en->IsImmediate()) + { + for(auto cn : en->GetOrderedChildNodesReference()) + { + if(cn == nullptr) + continue; + + ValidateEvaluableNodeTreeMemoryIntegrityRecurse(cn, checked); + } + } +} diff --git a/src/Amalgam/evaluablenode/EvaluableNodeManagement.h b/src/Amalgam/evaluablenode/EvaluableNodeManagement.h new file mode 100644 index 00000000..36198f73 --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNodeManagement.h @@ -0,0 +1,658 @@ +#pragma once + +//project headers: +#include "Concurrency.h" +#include "EvaluableNode.h" + +typedef int64_t ExecutionCycleCount; +typedef int32_t ExecutionCycleCountCompactDelta; + +//describes an EvaluableNode reference and whether it is uniquely referenced +class EvaluableNodeReference +{ +public: + constexpr EvaluableNodeReference() : reference(nullptr), unique(true) + { } + + constexpr EvaluableNodeReference(EvaluableNode *_reference, bool _unique) + : reference(_reference), unique(_unique) + { } + + constexpr EvaluableNodeReference(const EvaluableNodeReference &inr) + : reference(inr.reference), unique(inr.unique) + { } + + //when attached a child node, make sure that this node reflects the same properties + void UpdatePropertiesBasedOnAttachedNode(EvaluableNodeReference &attached) + { + if(attached.reference == nullptr) + return; + + if(!attached.unique) + { + unique = false; + //if new attachments aren't unique, then can't guarantee there isn't a cycle present + reference->SetNeedCycleCheck(true); + } + else if(attached.reference->GetNeedCycleCheck()) + { + reference->SetNeedCycleCheck(true); + } + + if(!attached.reference->GetIsIdempotent()) + reference->SetIsIdempotent(false); + } + + //calls GetNeedCycleCheck if the reference is not nullptr, returns false if it is nullptr + constexpr bool GetNeedCycleCheck() + { + if(reference == nullptr) + return false; + + return reference->GetNeedCycleCheck(); + } + + //calls SetNeedCycleCheck if the reference is not nullptr + constexpr void SetNeedCycleCheck(bool need_cycle_check) + { + if(reference == nullptr) + return; + + reference->SetNeedCycleCheck(need_cycle_check); + } + + constexpr static EvaluableNodeReference Null() + { + return EvaluableNodeReference(nullptr, true); + } + + //allow to use as an EvaluableNode * + constexpr operator EvaluableNode *() + { return reference; } + + //allow to use as an EvaluableNode * + constexpr EvaluableNode *operator->() + { return reference; } + + EvaluableNode *reference; + + //this is the only reference to the result + bool unique; +}; + + +//Uses an EvaluableNode as a stack which may already have elements in it +// upon destruction it restores the stack back to the state it was when constructed +class EvaluableNodeStackStateSaver +{ +public: + __forceinline EvaluableNodeStackStateSaver(std::vector *_stack) + { + stack = _stack; + originalStackSize = stack->size(); + } + + //constructor that adds one first element + __forceinline EvaluableNodeStackStateSaver(std::vector *_stack, EvaluableNode *initial_element) + { + stack = _stack; + originalStackSize = stack->size(); + + stack->push_back(initial_element); + } + + __forceinline ~EvaluableNodeStackStateSaver() + { + stack->resize(originalStackSize); + } + + __forceinline void PushEvaluableNode(EvaluableNode *n) + { + stack->push_back(n); + } + + __forceinline void PopEvaluableNode() + { + stack->pop_back(); + } + + std::vector *stack; + size_t originalStackSize; +}; + + +class EvaluableNodeManager +{ +public: + EvaluableNodeManager(); + + ~EvaluableNodeManager(); + + ////////////////////////////////// + //convenience functions to alloc nodes with specific types of data + inline EvaluableNode *AllocNode(EvaluableNodeType type, const std::string &string_value) + { + EvaluableNode *n = AllocUninitializedNode(); + n->InitializeType(type, string_value); + return n; + } + + inline EvaluableNode *AllocNode(EvaluableNodeType type, StringInternPool::StringID string_id) + { + EvaluableNode *n = AllocUninitializedNode(); + n->InitializeType(type, string_id); + return n; + } + + //like AllocNode, but hands off the string reference to string_id + inline EvaluableNode *AllocNodeWithReferenceHandoff(EvaluableNodeType type, StringInternPool::StringID string_id) + { + EvaluableNode *n = AllocUninitializedNode(); + n->InitializeTypeWithReferenceHandoff(type, string_id); + return n; + } + + inline EvaluableNode *AllocNode(EvaluableNodeType type, StringInternRef &sir) + { return AllocNode(type, static_cast(sir)); } + inline EvaluableNode *AllocNode(EvaluableNodeType type, StringInternWeakRef &siwr) + { return AllocNode(type, static_cast(siwr)); } + + inline EvaluableNode *AllocNode(double float_value) + { + EvaluableNode *n = AllocUninitializedNode(); + n->InitializeType(float_value); + return n; + } + + inline EvaluableNode *AllocNode(int64_t int_value) + { + EvaluableNode *n = AllocUninitializedNode(); + n->InitializeType(static_cast(int_value)); + return n; + } + + inline EvaluableNode *AllocNode(EvaluableNodeType type) + { + EvaluableNode *n = AllocUninitializedNode(); + n->InitializeType(type); + return n; + } + + inline EvaluableNode *AllocListNode(std::vector *child_nodes) + { + EvaluableNode *n = AllocNode(ENT_LIST); + n->SetOrderedChildNodes(*child_nodes); + return n; + } + + //allocates and returns a node of type ENT_LIST + // and allocates num_child_nodes child nodes initialized to child_node_type (with an appropriate default value) + EvaluableNode *AllocListNodeWithOrderedChildNodes(EvaluableNodeType child_node_type, size_t num_child_nodes); + + //ways that labels can be modified when a new node is allocated + enum EvaluableNodeMetadataModifier + { + ENMM_NO_CHANGE, //leave labels as they are + ENMM_LABEL_ESCAPE_INCREMENT, //insert a # in front of each label + ENMM_LABEL_ESCAPE_DECREMENT, //remove a # from the front of each label + ENMM_REMOVE_ALL //remove all metadata + }; + EvaluableNode *AllocNode(EvaluableNode *original, EvaluableNodeMetadataModifier metadata_modifier = ENMM_NO_CHANGE); + + //Copies the data structure and everything underneath it, modifying labels as specified + // if cycle_free is true on input, then it can perform a faster copy + inline EvaluableNodeReference DeepAllocCopy(EvaluableNode *tree, EvaluableNodeMetadataModifier metadata_modifier = ENMM_NO_CHANGE) + { + if(tree == nullptr) + return EvaluableNodeReference::Null(); + + if(!tree->GetNeedCycleCheck()) + return EvaluableNodeReference(NonCycleDeepAllocCopy(tree, metadata_modifier), true); + + EvaluableNode::ReferenceAssocType references; + return DeepAllocCopy(tree, references, metadata_modifier); + } + + //used to hold all of the references for DeepAllocCopy calls + struct DeepAllocCopyParams + { + constexpr DeepAllocCopyParams(EvaluableNode::ReferenceAssocType *_references, EvaluableNodeMetadataModifier metadata_modifier) + : references(_references), labelModifier(metadata_modifier) + { } + + EvaluableNode::ReferenceAssocType *references; + EvaluableNodeMetadataModifier labelModifier; + }; + + //Copies the data structure and everything underneath it, modifying labels as specified + // modifies labels as specified + // will determine whether the tree is cycle free and return the appropriate value in the EvaluableNodeReference + //references is a map of those nodes that have already been copied, with the key being the original and the value being the copy -- it first looks in references before making a copy + inline EvaluableNodeReference DeepAllocCopy(EvaluableNode *tree, EvaluableNode::ReferenceAssocType &references, EvaluableNodeMetadataModifier metadata_modifier = ENMM_NO_CHANGE) + { + if(tree == nullptr) + return EvaluableNodeReference::Null(); + + //start with cycleFree true, will be set to false if it isn't + DeepAllocCopyParams dacp(&references, metadata_modifier); + auto [copy, need_cycle_check] = DeepAllocCopy(tree, dacp); + return EvaluableNodeReference(copy, true); + } + + //modifies the labels for the tree as described by metadata_modifier + inline static void ModifyLabelsForNodeTree(EvaluableNode *tree, EvaluableNodeMetadataModifier metadata_modifier = ENMM_NO_CHANGE) + { + if(tree == nullptr || metadata_modifier == ENMM_NO_CHANGE) + return; + + if(!tree->GetNeedCycleCheck()) + { + NonCycleModifyLabelsForNodeTree(tree, metadata_modifier); + return; + } + + EvaluableNode::ReferenceSetType checked; + ModifyLabelsForNodeTree(tree, checked, metadata_modifier); + } + + //computes whether the code is cycle free and idempotent and updates all nodes appropriately + static inline void UpdateFlagsForNodeTree(EvaluableNode *tree) + { + if(tree == nullptr) + return; + + EvaluableNode::ReferenceSetType checked; + UpdateFlagsForNodeTreeRecurse(tree, checked); + } + + //computes whether the code is cycle free and idempotent and updates all nodes appropriately + // uses checked to store nodes + static inline void UpdateFlagsForNodeTree(EvaluableNode *tree, EvaluableNode::ReferenceSetType &checked) + { + if(tree == nullptr) + return; + + checked.clear(); + UpdateFlagsForNodeTreeRecurse(tree, checked); + } + + //heuristic used to determine whether unused memory should be collected (e.g., by FreeAllNodesExcept*) + bool RecommendGarbageCollection(); + + //moves garbage collection to be more likely to be triggered next time CollectGarbage is called + __forceinline void AdvanceGarbageCollectionTrigger() + { + //count setting data on an entity toward trigger gc + executionCyclesSinceLastGarbageCollection += minCycleCountBetweenGarbageCollects / 4; + } + + //runs heuristics and collects garbage +#ifdef MULTITHREAD_SUPPORT + //if multithreaded, then memory_modification_lock is the lock used for memoryModificationMutex if not nullptr + void CollectGarbage(Concurrency::ReadLock *memory_modification_lock); +#else + void CollectGarbage(); +#endif + + //frees an EvaluableNode (must be owned by this EvaluableNodeManager) + inline void FreeNode(EvaluableNode *n) + { + if(n == nullptr) + return; + + n->Invalidate(); + + ReclaimFreedNodesAtEnd(); + } + + //attempts to free the node reference + __forceinline void FreeNodeIfPossible(EvaluableNodeReference &enr) + { + if(enr.unique) + FreeNode(enr); + } + + //frees all nodes + void FreeAllNodes(); + + //frees the entire tree in the respective ways for the corresponding permanance types allowed + inline void FreeNodeTree(EvaluableNode *en) + { + if(en == nullptr) + return; + + if(IsEvaluableNodeTypeImmediate(en->GetType())) + { + en->Invalidate(); + } + else if(!en->GetNeedCycleCheck()) + { + FreeNodeTreeRecurse(en); + } + else //more costly cyclic free + { + #ifdef MULTITHREAD_SUPPORT + //need to acquire a read lock, because if any node is reclaimed or compacted while this free is taking place, + // and another thread allocates it, then this cyclic free could accidentally free a node that was freed and + // reclaimed by another thread + Concurrency::ReadLock lock(managerAttributesMutex); + #endif + FreeNodeTreeWithCyclesRecurse(en); + } + + ReclaimFreedNodesAtEnd(); + } + + //attempts to free the node reference + __forceinline void FreeNodeTreeIfPossible(EvaluableNodeReference &enr) + { + if(enr.unique) + FreeNodeTree(enr); + } + + //just frees the child nodes of tree, but not tree itself; assumes no cycles + inline void FreeNodeChildNodes(EvaluableNode *tree) + { + if(tree->IsAssociativeArray()) + { + for(auto &[_, e] : tree->GetMappedChildNodesReference()) + { + if(e != nullptr) + FreeNodeTreeRecurse(e); + } + } + else if(tree->IsOrderedArray()) + { + for(auto &e : tree->GetOrderedChildNodesReference()) + { + if(e != nullptr) + FreeNodeTreeRecurse(e); + } + } + + ReclaimFreedNodesAtEnd(); + } + + //if no nodes are referenced, then will free all + inline void ClearAllNodesIfNoneReferenced() + { + if(nodesCurrentlyReferenced.size() == 0) + FreeAllNodes(); + } + + //adds the node to nodesCurrentlyReferenced + void KeepNodeReference(EvaluableNode *en); + + //like KeepNodeReference but iterates over a collection + template + inline void KeepNodeReferences(EvaluableNodeCollection &node_collection) + { + #ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(managerAttributesMutex); + #endif + + for(auto en : node_collection) + { + if(en == nullptr) + continue; + + //attempt to put in value 1 for the reference + auto [inserted_result, inserted] = nodesCurrentlyReferenced.insert(std::make_pair(en, 1)); + + //if couldn't insert because already referenced, then increment + if(!inserted) + inserted_result->second++; + } + } + + //removes the node from nodesCurrentlyReferenced + void FreeNodeReference(EvaluableNode *en); + + //like FreeNodeReference but iterates over a collection + template + inline void FreeNodeReferences(EvaluableNodeCollection &node_collection) + { + #ifdef MULTITHREAD_SUPPORT + Concurrency::WriteLock lock(managerAttributesMutex); + #endif + + for(auto en : node_collection) + { + if(en == nullptr) + continue; + + //get reference count + auto node = nodesCurrentlyReferenced.find(en); + + //don't do anything if not counted + if(node == nodesCurrentlyReferenced.end()) + continue; + + //if it has sufficient refcount, then just decrement + if(node->second > 1) + node->second--; + else //otherwise remove reference + nodesCurrentlyReferenced.erase(node); + } + } + + //compacts allocated nodes so that the node pool can be used more efficiently + // and can improve reuse without calling the more expensive FreeAllNodesExceptReferencedNodes + void CompactAllocatedNodes(); + + //allows freed nodes at the end of nodes to be reallocated + inline void ReclaimFreedNodesAtEnd() + { + #ifdef MULTITHREAD_SUPPORT + //this is much more expensive with multithreading, so only do when useful + if((executionCyclesSinceLastGarbageCollection & 511) != 0) + return; + + //be opportunistic and only attempt to reclaim if it can grab a write lock + Concurrency::WriteLock write_lock(managerAttributesMutex, std::defer_lock); + if(!write_lock.try_lock()) + return; + #endif + + //if any group of nodes on the top are ready to be cleaned up cheaply, do so + while(firstUnusedNodeIndex > 0 && nodes[firstUnusedNodeIndex - 1] != nullptr && nodes[firstUnusedNodeIndex - 1]->GetType() == ENT_DEALLOCATED) + firstUnusedNodeIndex--; + } + + //returns the number of nodes currently being used that have not been freed yet + __forceinline size_t GetNumberOfUsedNodes() + { return firstUnusedNodeIndex; } + + __forceinline size_t GetNumberOfUnusedNodes() + { return nodes.size() - firstUnusedNodeIndex; } + + __forceinline size_t GetNumberOfNodesReferenced() + { + return nodesCurrentlyReferenced.size(); + } + + //returns the root node, implicitly defined as the first node in memory + // note that this means there should be at least one node allocated and SetRootNode called before this function is called + inline EvaluableNode *GetRootNode() + { + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(managerAttributesMutex); + #endif + + if(firstUnusedNodeIndex == 0) + return nullptr; + + return nodes[0]; + } + + //sets the root node, implicitly defined as the first node in memory, to new_root + // note that new_root MUST have been allocated by this EvaluableNodeManager + inline void SetRootNode(EvaluableNode *new_root) + { + #ifdef MULTITHREAD_SUPPORT + //use WriteLock to be safe + Concurrency::WriteLock lock(managerAttributesMutex); + #endif + + //iteratively search forward; this will be fast for newly created entities but potentially slow for those that are not + // however, this should be rarely called on those entities since it's basically clearing them out, so it should not generally be a performance issue + auto location = std::find(begin(nodes), begin(nodes) + firstUnusedNodeIndex, new_root); + + //swap the pointers + if(location != end(nodes)) + std::swap(*begin(nodes), *location); + } + + //returns a copy of the nodes referenced; should be used only for debugging + inline EvaluableNode::ReferenceCountType &GetNodesReferenced() + { + return nodesCurrentlyReferenced; + } + + //returns true if any node is referenced other than root, which is an indication if there are + // any interpreters operating on the nodes managed by this instance + inline bool IsAnyNodeReferencedOtherThanRoot() + { + #ifdef MULTITHREAD_SUPPORT + Concurrency::ReadLock lock(managerAttributesMutex); + #endif + + size_t num_nodes_currently_referenced = nodesCurrentlyReferenced.size(); + if(num_nodes_currently_referenced > 1) + return true; + + if(num_nodes_currently_referenced == 0) + return false; + + //exactly one node referenced; if the root is null, then it has to be something else + if(nodes[0] == nullptr) + return true; + + //in theory this should always find the root node being referenced and thus return false + // but if there is any sort of unusual situation where the root node isn't referenced, it'll catch it + // and report that there is something else being referenced + return (nodesCurrentlyReferenced.find(nodes[0]) == end(nodesCurrentlyReferenced)); + } + + //Returns all nodes still in use. For debugging purposes + std::vector GetUsedNodes() + { return std::vector(begin(nodes), begin(nodes) + firstUnusedNodeIndex); } + + //returns an estimate of the amount of memory allocated by the nodes managed + // only an estimate because the platform's underlying memory management system may need to allocate additional + // memory that cannot be easily accounted for + size_t GetEstimatedTotalReservedSizeInBytes(); + size_t GetEstimatedTotalUsedSizeInBytes(); + + //makes sure that the evaluable node and everything referenced by it has not been deallocated + //asserts an error if it finds any + //intended for debugging only + static void ValidateEvaluableNodeTreeMemoryIntegrity(EvaluableNode *en); + + //total number of execution cycles since one of the FreeAllNodes* functions was called +#ifdef MULTITHREAD_SUPPORT + std::atomic executionCyclesSinceLastGarbageCollection; +#else + ExecutionCycleCount executionCyclesSinceLastGarbageCollection; +#endif + +protected: + //allocates an EvaluableNode of the respective memory type in the appropriate way + // returns an uninitialized EvaluableNode -- care must be taken to set fields properly + EvaluableNode *AllocUninitializedNode(); + + //frees everything execpt those nodes referenced by nodesCurrentlyReferenced + void FreeAllNodesExceptReferencedNodes(); + + //support for FreeNodeTree, but requires that tree not be nullptr + void FreeNodeTreeRecurse(EvaluableNode *tree); + + //support for FreeNodeTreeWithCycles, but requires that tree not be nullptr + void FreeNodeTreeWithCyclesRecurse(EvaluableNode *tree); + + //modifies the labels of n with regard to metadata_modifier + // assumes n is not nullptr + static void ModifyLabels(EvaluableNode *n, EvaluableNodeMetadataModifier metadata_modifier); + + //more efficient version of DeepAllocCopy + //returns a pair of the copy and true if the copy needs cycle check + //assumes tree is not nullptr + std::pair DeepAllocCopy(EvaluableNode *tree, DeepAllocCopyParams &dacp); + + //performs a deep copy on tree when tree is guaranteed to have no reference cycles + // assumes tree is NOT nullptr +#ifndef _OPENMP + EvaluableNode *NonCycleDeepAllocCopy(EvaluableNode *tree, EvaluableNodeMetadataModifier metadata_modifier); +#else + //keep track of whether there's a top level parallelization + EvaluableNode *NonCycleDeepAllocCopy(EvaluableNode *tree, EvaluableNodeMetadataModifier metadata_modifier, bool parallelize = true); +#endif + + //recursive helper function for ModifyLabelsForNodeTree + //assumes tree is not nullptr + static void ModifyLabelsForNodeTree(EvaluableNode *tree, EvaluableNode::ReferenceSetType &checked, EvaluableNodeMetadataModifier metadata_modifier = ENMM_NO_CHANGE); + + //recursive helper function for ModifyLabelsForNodeTree + //assumes tree is not nullptr + static void NonCycleModifyLabelsForNodeTree(EvaluableNode *tree, EvaluableNodeMetadataModifier metadata_modifier = ENMM_NO_CHANGE); + + //sets all referenced nodes' garbage collection iteration to gc_collect_iteration + inline void SetAllReferencedNodesGCCollectIteration(uint8_t gc_collect_iteration) + { + //check for null or insertion before calling recursion to minimize number of branches (slight performance improvement) + for(auto &[t, _] : nodesCurrentlyReferenced) + { + if(t == nullptr || t->GetGarbageCollectionIteration() == gc_collect_iteration) + continue; + + SetAllReferencedNodesGCCollectIterationRecurse(t, gc_collect_iteration); + } + } + + //computes whether the code is cycle free and idempotent and updates all nodes appropriately + // returns flags for whether cycle free and idempotent + // requires tree not be nullptr + static std::pair UpdateFlagsForNodeTreeRecurse(EvaluableNode *tree, EvaluableNode::ReferenceSetType &checked); + + //inserts all nodes referenced by tree into the set references + //note that tree cannot be nullptr and it should already be inserted into the references prior to calling + static void SetAllReferencedNodesGCCollectIterationRecurse(EvaluableNode *tree, uint8_t gc_collect_iteration); + + static void ValidateEvaluableNodeTreeMemoryIntegrityRecurse(EvaluableNode *en, EvaluableNode::ReferenceSetType &checked); + +#ifdef MULTITHREAD_SUPPORT +public: + //mutex to manage attributes of manager, including operations such as + // memory allocation, reference management, etc. + Concurrency::ReadWriteMutex managerAttributesMutex; + + //mutex to manage whether memory nodes managed by this manager are being modified + //concurrent modifications can occur as long as there is only one unique thread + // that has allocated the memory + //garbage collection or destruction of the manager require a unique lock + // so that the memory can be traversed + Concurrency::ReadWriteMutex memoryModificationMutex; + +protected: +#endif + + //keeps track of all of the nodes currently referenced by any resource or interpreter + EvaluableNode::ReferenceCountType nodesCurrentlyReferenced; + + //nodes that have been allocated and may be in use + // all nodes in use are below firstUnusedNodeIndex, such that all above that index are free for use + // nodes cannot be nullptr for lower indices than firstUnusedNodeIndex + std::vector nodes; + +#ifdef MULTITHREAD_SUPPORT + std::atomic firstUnusedNodeIndex; +#else + size_t firstUnusedNodeIndex; +#endif + + //extra space to allocate when allocating + static const double allocExpansionFactor; + + //minimum number of cycles between collects as to not spend too much time garbage collecting + static const ExecutionCycleCountCompactDelta minCycleCountBetweenGarbageCollects; +}; diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeDifference.cpp b/src/Amalgam/evaluablenode/EvaluableNodeTreeDifference.cpp new file mode 100644 index 00000000..081c08fa --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeDifference.cpp @@ -0,0 +1,278 @@ +//project headers: +#include "EvaluableNodeTreeDifference.h" +#include "EvaluableNodeTreeFunctions.h" + +//system headers: +#include + +EvaluableNode *EvaluableNodeTreeDifference::NodesMergeForDifferenceMethod::MergeValues(EvaluableNode *a, EvaluableNode *b, bool must_merge) +{ + EvaluableNode *result = MergeTrees(this, a, b); + + //record what was included + if(result != nullptr) + { + if(a != nullptr) + aNodesIncluded[a] = result; + if(b != nullptr) + bNodesIncluded[b] = result; + } + + return result; +} + +EvaluableNode *EvaluableNodeTreeDifference::DifferenceTrees(EvaluableNodeManager *enm, EvaluableNode *tree1, EvaluableNode *tree2) +{ + //find commonality + NodesMergeForDifferenceMethod mm(enm); + EvaluableNode *anded_trees = mm.MergeValues(tree1, tree2); + auto &tree1_to_merged_node = mm.GetANodesIncluded(); + auto &tree2_to_merged_node = mm.GetBNodesIncluded(); + + ////////// + //build replace code + + //update difference function to: (declare ) + EvaluableNode *difference_function = enm->AllocNode(ENT_DECLARE); + + //update difference function to: (declare (assoc _ null) ) + EvaluableNode *df_vars = enm->AllocNode(ENT_ASSOC); + df_vars->SetMappedChildNode(ENBISI__, enm->AllocNode(ENT_NULL)); + difference_function->AppendOrderedChildNode(df_vars); + + //update difference function to: (declare (assoc _ null) (replace _ ) ) + EvaluableNode *df_replace = enm->AllocNode(ENT_REPLACE); + difference_function->AppendOrderedChildNode(df_replace); + df_replace->AppendOrderedChildNode(enm->AllocNode(ENT_SYMBOL, ENBISI__)); + + ////////// + //find nodes that are mutually exclusive and create lookup tables + + //traverse tree1 looking for any references not included + // if not included, then find path to node and create set of delete instructions + std::vector tree1_top_nodes_excluded; + EvaluableNode::ReferenceAssocType tree1_to_parent_node; + FindTopNodesExcluded(tree1, tree1_to_merged_node, tree1_top_nodes_excluded, tree1_to_parent_node); + + //traverse tree2 looking for any references not included + // if not included, then find path to node and create set of insert instructions + std::vector tree2_top_nodes_excluded; + EvaluableNode::ReferenceAssocType tree2_to_parent_node; + FindTopNodesExcluded(tree2, tree2_to_merged_node, tree2_top_nodes_excluded, tree2_to_parent_node); + + EvaluableNode::ReferenceAssocType merged_references_with_parents; + FindParentReferences(anded_trees, merged_references_with_parents); + + EvaluableNode::ReferenceAssocType merged_to_tree1_node; + for(auto &[n1, n2] : tree1_to_merged_node) + merged_to_tree1_node[n2] = n1; + + EvaluableNode::ReferenceAssocType merged_to_tree2_node; + for(auto &[n1, n2] : tree2_to_merged_node) + merged_to_tree2_node[n2] = n1; + + //find unique parent nodes that need to be replaced, but keep them in order + std::vector merged_nodes_need_replacing; + for(auto &parent : tree1_top_nodes_excluded) + { + EvaluableNode *merged_parent = nullptr; + if(parent != nullptr) + merged_parent = tree1_to_merged_node[parent]; + + //don't modify the node more than once + if(std::find(begin(merged_nodes_need_replacing), end(merged_nodes_need_replacing), merged_parent) == end(merged_nodes_need_replacing)) + merged_nodes_need_replacing.push_back(merged_parent); + } + for(auto &parent : tree2_top_nodes_excluded) + { + EvaluableNode *merged_parent = nullptr; + if(parent != nullptr) + merged_parent = tree2_to_merged_node[parent]; + + //don't modify the node more than once + if(std::find(begin(merged_nodes_need_replacing), end(merged_nodes_need_replacing), merged_parent) == end(merged_nodes_need_replacing)) + merged_nodes_need_replacing.push_back(merged_parent); + } + //start from bottom of tree and work way back up to top to ensure nodes are in original order + std::reverse(begin(merged_nodes_need_replacing), end(merged_nodes_need_replacing)); + + ////////// + //perform replacements + + //for all nodes that need to be replaced, replace with tree2's version, but retrieve all relevant child nodes from the tree2 version + for(auto &node_to_replace : merged_nodes_need_replacing) + { + if(node_to_replace != nullptr) + { + EvaluableNode *path_to_replace = GetTraversalPathListFromAToB(enm, tree1_to_parent_node, merged_to_tree1_node[anded_trees], merged_to_tree1_node[node_to_replace]); + df_replace->AppendOrderedChildNode(path_to_replace); + } + else //pointing to top-most node, so leave list accsess blank + df_replace->AppendOrderedChildNode(enm->AllocNode(ENT_LIST)); + + EvaluableNode *replacement_function = enm->AllocNode(ENT_LAMBDA); + df_replace->AppendOrderedChildNode(replacement_function); + + //if node to replace is nullptr, then replace the parent object + if(node_to_replace == nullptr) + { + df_replace->AppendOrderedChildNode(tree2); + break; + } + + //make sure node replacing is actually in tree2 and find it + auto tree2_node_reference = merged_to_tree2_node.find(node_to_replace); + if(tree2_node_reference == end(merged_to_tree2_node)) + continue; + EvaluableNode *tree2_node = merged_to_tree2_node[node_to_replace]; //need to reverse look up node_to_replace to get tree2's node + if(tree2_node == nullptr) + continue; + //make a copy and make sure labels are escaped, then clear any child node lists + // which will make sure there is a lower chance of reallocation when adding child nodes + EvaluableNode *replacement = enm->AllocNode(tree2_node, EvaluableNodeManager::ENMM_LABEL_ESCAPE_INCREMENT); + replacement->ClearOrderedChildNodes(); + + //make sure it is of a data containing type, otherwise need to convert and then set_type + auto replacement_type = replacement->GetType(); + if(replacement_type == ENT_LIST || replacement_type == ENT_ASSOC) + { + replacement_function->AppendOrderedChildNode(replacement); + } + else //need to create a list and transform it into (set_type ... type) + { + replacement->SetType(ENT_LIST, enm); + EvaluableNode *set_type = enm->AllocNode(ENT_SET_TYPE); + set_type->AppendOrderedChildNode(replacement); + set_type->AppendOrderedChildNode(enm->AllocNode(ENT_STRING, GetStringFromEvaluableNodeType(replacement_type))); + + replacement_function->AppendOrderedChildNode(set_type); + } + + //replace any ordered + for(auto &cn : tree2_node->GetOrderedChildNodes()) + { + auto merged = tree2_to_merged_node.find(cn); + if(merged == end(tree2_to_merged_node) || (cn != nullptr && cn->GetType() == ENT_SYMBOL)) + { + //use whatever was given for tree2 + replacement->AppendOrderedChildNode(cn); + } + else + { + //build (get (target_value 1) ...) + EvaluableNode *retrieval = enm->AllocNode(ENT_GET); + replacement->AppendOrderedChildNode(retrieval); + EvaluableNode *target = enm->AllocNode(ENT_TARGET_VALUE); + target->AppendOrderedChildNode(enm->AllocNode(1.0)); + retrieval->AppendOrderedChildNode(target); + + //match up to tree1 + EvaluableNode *tree1_node = merged_to_tree1_node[node_to_replace]; + EvaluableNode *tree1_cn = merged_to_tree1_node[merged->second]; + + //This should not happen. Something weird happened. + if(tree1_node == nullptr || tree1_cn == nullptr) + continue; + + //get position from tree1 + auto position_in_merged = std::find(begin(tree1_node->GetOrderedChildNodes()), end(tree1_node->GetOrderedChildNodes()), tree1_cn); + size_t index = std::distance(begin(tree1_node->GetOrderedChildNodes()), position_in_merged); + retrieval->AppendOrderedChildNode(enm->AllocNode(static_cast(index))); + } + } + + //replace any mapped + for(auto &[cn_id, cn] : tree2_node->GetMappedChildNodes()) + { + auto merged = tree2_to_merged_node.find(cn); + replacement->AppendOrderedChildNode(enm->AllocNode(ENT_STRING, cn_id)); + if(merged == end(tree2_to_merged_node)) + { + //use whatever was given for tree2 + replacement->AppendOrderedChildNode(cn); + } + else + { + //build (get (target_value 1) ...) + EvaluableNode *retrieval = enm->AllocNode(ENT_GET); + replacement->AppendOrderedChildNode(retrieval); + EvaluableNode *target = enm->AllocNode(ENT_TARGET_VALUE); + target->AppendOrderedChildNode(enm->AllocNode(1.0)); + retrieval->AppendOrderedChildNode(target); + + retrieval->AppendOrderedChildNode(enm->AllocNode(ENT_STRING, cn_id)); + } + } + } + + return difference_function; +} + +void EvaluableNodeTreeDifference::FindParentReferences(EvaluableNode *tree, EvaluableNode::ReferenceAssocType &references_with_parents, EvaluableNode *parent) +{ + if(tree == nullptr) + return; + + //attempt to record the reference, but if already processed, skip + if(references_with_parents.insert(std::make_pair(tree, parent)).second == false) + return; + + for(auto &cn : tree->GetOrderedChildNodes()) + FindParentReferences(cn, references_with_parents, tree); + for(auto &[_, cn] : tree->GetMappedChildNodes()) + FindParentReferences(cn, references_with_parents, tree); +} + +void EvaluableNodeTreeDifference::FindTopNodesExcluded(EvaluableNode *tree, EvaluableNode::ReferenceAssocType &nodes_included, + std::vector &top_nodes_excluded, EvaluableNode::ReferenceAssocType &references_with_parents, EvaluableNode *parent) +{ + if(tree == nullptr) + return; + + //attempt to record the reference, but if already processed, skip (also prevents infinite recursion in graph structures) + if(references_with_parents.insert(std::make_pair(tree, parent)).second == false) + return; + + //if included, traverse tree, if not, insert as top excluded node + auto included_node_found = nodes_included.find(tree); + if(included_node_found == end(nodes_included)) + top_nodes_excluded.push_back(parent); + else //node *itself* is included, now check to see if it is included with all its respective keys, and also its child nodes + { + //make sure matches; if any of the keys don't match, then it's excluded + EvaluableNode *matching = (*included_node_found).second; + if(matching == nullptr) + { + top_nodes_excluded.push_back(tree); + //can't continue because matching is null + return; + } + + auto &tree_ocn = tree->GetOrderedChildNodes(); + auto &tree_mcn = tree->GetMappedChildNodes(); + + if(matching->GetOrderedChildNodes().size() != tree_ocn.size()) + top_nodes_excluded.push_back(tree); + + auto &matching_mcn = matching->GetMappedChildNodes(); + + if(matching_mcn.size() != tree_mcn.size()) + top_nodes_excluded.push_back(tree); + + //if any missing keys then also it is excluded -- needs to be recreated + for(auto &[cn_id, _] : tree_mcn) + { + if(matching_mcn.find(cn_id) == end(matching_mcn)) + { + top_nodes_excluded.push_back(tree); + break; + } + } + + //check child nodes + for(auto &cn : tree_ocn) + FindTopNodesExcluded(cn, nodes_included, top_nodes_excluded, references_with_parents, tree); + for(auto &[_, cn] : tree_mcn) + FindTopNodesExcluded(cn, nodes_included, top_nodes_excluded, references_with_parents, tree); + } +} diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeDifference.h b/src/Amalgam/evaluablenode/EvaluableNodeTreeDifference.h new file mode 100644 index 00000000..7aefb834 --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeDifference.h @@ -0,0 +1,44 @@ +#pragma once + +//project headers: +#include "EvaluableNodeTreeManipulation.h" + +class EvaluableNodeTreeDifference : public EvaluableNodeTreeManipulation +{ +public: + + //functionality to merge two nodes + class NodesMergeForDifferenceMethod : public NodesMergeMethod + { + public: + NodesMergeForDifferenceMethod(EvaluableNodeManager *_enm) + : NodesMergeMethod(_enm, false, true) + { } + + virtual EvaluableNode *MergeValues(EvaluableNode *a, EvaluableNode *b, bool must_merge = false); + + constexpr EvaluableNode::ReferenceAssocType &GetANodesIncluded() + { return aNodesIncluded; } + constexpr EvaluableNode::ReferenceAssocType &GetBNodesIncluded() + { return bNodesIncluded; } + + protected: + //key is the node from tree a or b, value is the node from the merged tree + EvaluableNode::ReferenceAssocType aNodesIncluded; + EvaluableNode::ReferenceAssocType bNodesIncluded; + }; + + //returns code that will transform tree1 into tree2, using allocations from enm + static EvaluableNode *DifferenceTrees(EvaluableNodeManager *enm, EvaluableNode *tree1, EvaluableNode *tree2); + +protected: + + //given a set of nodes, will traverse and populate each with a reference to its parent, in traversal order + static void FindParentReferences(EvaluableNode *tree, EvaluableNode::ReferenceAssocType &references_with_parents, EvaluableNode *parent = nullptr); + + //given a set of nodes to be included (nodes_included, with the values being their matching original tree counterparts), + // will traverse tree to find the topmost nodes excluded (top_nodes_excluded, with the values being their matching original tree counterparts) which is the parent of all of the subtrees that will be excluded + // adds any nodes encountered to references_with_parets, as to be used for finding the paths to any of the nodes for creation and deletion + static void FindTopNodesExcluded(EvaluableNode *tree, EvaluableNode::ReferenceAssocType &nodes_included, + std::vector &top_nodes_excluded, EvaluableNode::ReferenceAssocType &references_with_parents, EvaluableNode *parent = nullptr); +}; diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.cpp b/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.cpp new file mode 100644 index 00000000..5998e5c0 --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.cpp @@ -0,0 +1,700 @@ +//project headers: +#include "EvaluableNodeTreeFunctions.h" +#include "FastMath.h" +#include "Interpreter.h" + +//system headers: +#include +#include + +bool CustomEvaluableNodeComparator::operator()(EvaluableNode *a, EvaluableNode *b) +{ + //create context with "a" and "b" variables + interpreter->PushNewConstructionContext(nullptr, targetList, EvaluableNodeImmediateValueWithType(), a); + interpreter->PushNewConstructionContext(nullptr, targetList, EvaluableNodeImmediateValueWithType(), b); + + //compare + bool retval = (interpreter->InterpretNodeIntoNumberValue(function) > 0); + + interpreter->PopConstructionContext(); + interpreter->PopConstructionContext(); + + return retval; +} + +//performs a top-down stable merge on the sub-lists from start_index to middle_index and middle_index to _end_index from source into destination using cenc +void CustomEvaluableNodeOrderedChildNodesTopDownMerge(std::vector &source, size_t start_index, size_t middle_index, size_t end_index, std::vector &destination, CustomEvaluableNodeComparator &cenc) +{ + size_t left_pos = start_index; + size_t right_pos = middle_index; + + //for all elements, pull from the appropriate buffer (left or right) + for(size_t cur_index = start_index; cur_index < end_index; cur_index++) + { + //if left_pos has elements left and is less than the right, use it + if(left_pos < middle_index && (right_pos >= end_index || cenc(source[left_pos], source[right_pos]))) + { + destination[cur_index] = source[left_pos]; + left_pos++; + } + else //the right is less, use that + { + destination[cur_index] = source[right_pos]; + right_pos++; + } + } +} + +//performs a stable merge sort of source (which *will* be modified and is not constant) from start_index to end_index into destination; uses cenc for comparison +void CustomEvaluableNodeOrderedChildNodesSort(std::vector &source, size_t start_index, size_t end_index, std::vector &destination, CustomEvaluableNodeComparator &cenc) +{ + //if one element, then sorted + if(start_index + 1 >= end_index) + return; + + size_t middle_index = (start_index + end_index) / 2; + + //sort left into list + CustomEvaluableNodeOrderedChildNodesSort(destination, start_index, middle_index, source, cenc); + //sort right into list + CustomEvaluableNodeOrderedChildNodesSort(destination, middle_index, end_index, source, cenc); + + //merge buffers back into buffer + CustomEvaluableNodeOrderedChildNodesTopDownMerge(source, start_index, middle_index, end_index, destination, cenc); +} + +std::vector CustomEvaluableNodeOrderedChildNodesSort(std::vector &list, CustomEvaluableNodeComparator &cenc) +{ + //must make two copies of the list to edit, because switch back and forth and there is a chance that an element may be invalid + // in either list. Therefore, can't use the original list in the off chance that something is garbage collected + std::vector list_copy_1(list); + std::vector list_copy_2(list); + CustomEvaluableNodeOrderedChildNodesSort(list_copy_1, 0, list.size(), list_copy_2, cenc); + return list_copy_2; +} + +//compares right-aligned numbers in a string. searches for first digit that isn't equal, +// figures out which one is greater, and remembers it. then it sees which number string is longer +// if the number strings are the same length, then go with whichever was remembered to be bigger +// both indices will be updated along the way +int CompareNumberInStringRightJustified(const std::string &a, const std::string &b, size_t &a_index, size_t &b_index) +{ + //comparison result of first non-matching digit + int compare_val_if_same_length = 0; + + while(1) + { + unsigned char a_value; + unsigned char b_value; + + //treat as if zero terminated strings + if(a_index < a.size()) + a_value = a[a_index]; + else + a_value = '\0'; + + if(b_index < b.size()) + b_value = b[b_index]; + else + b_value = '\0'; + + if(!std::isdigit(a_value) && !std::isdigit(b_value)) + return compare_val_if_same_length; + if(!std::isdigit(a_value)) + return -1; + if(!std::isdigit(b_value)) + return +1; + + //see if found first nonmatching digit + if(a_value < b_value) + { + if(compare_val_if_same_length == 0) + compare_val_if_same_length = -1; + } + else if(a_value > b_value) + { + if(compare_val_if_same_length == 0) + compare_val_if_same_length = +1; + } + + a_index++; + b_index++; + } + + //can't make it here + return 0; +} + + +//compares left-aligned numbers in a string until a difference is found, then uses that for comparison +// starts at the specified indicies +// both indices will be updated along the way +int CompareNumberInStringLeftJustified(const std::string &a, const std::string &b, size_t &a_index, size_t &b_index) +{ + while(1) + { + unsigned char a_value; + unsigned char b_value; + + //treat as if zero terminated strings + if(a_index < a.size()) + a_value = a[a_index]; + else + a_value = '\0'; + + if(b_index < b.size()) + b_value = b[b_index]; + else + b_value = '\0'; + + //if out of digits, then they're equal + if(!std::isdigit(a_value) && !std::isdigit(b_value)) + return 0; + + //if one ran out of digits, then it's less + if(!std::isdigit(a_value)) + return -1; + if(!std::isdigit(b_value)) + return +1; + + //compare values + if(a_value < b_value) + return -1; + if(a_value > b_value) + return +1; + + a_index++; + b_index++; + } + + //can't get here + return 0; +} + +//compares two strings "naturally" as applicable, ignoring spaces and treating numbers how a person would +// however, if the strings are "identical" via natural comparison, then it falls back to regular string comparison to ensure +// that strings are always ordered the same way +int StringNaturalCompare(const std::string &a, const std::string &b) +{ + size_t a_index = 0, b_index = 0; + + while(1) + { + unsigned char a_value; + unsigned char b_value; + + //skip over spaces + while(a_index < a.size() && std::isspace(static_cast(a[a_index]))) + a_index++; + //treat as if zero terminated string + if(a_index < a.size()) + a_value = a[a_index]; + else + a_value = '\0'; + + //skip over spaces + while(b_index < b.size() && std::isspace(static_cast(b[b_index]))) + b_index++; + if(b_index < b.size()) + b_value = b[b_index]; + else + b_value = '\0'; + + //check for group of digits + if(std::isdigit(a_value) && std::isdigit(static_cast(b_value))) + { + int result; + //if starts with leading zeros, then do a comparison from the left, otherwise from the right + if(a_value == '0' || b_value == '0') + result = CompareNumberInStringLeftJustified(a, b, a_index, b_index); + else + result = CompareNumberInStringRightJustified(a, b, a_index, b_index); + + if(result != 0) + return result; + + //if made it here, then the numbers were equal; move on to the next character + continue; + } + + //if strings are identical from a natural sorting perspective, then use regular compare to make sure order consistency is preserved + if(a_value == '\0' && b_value == '\0') + return a.compare(b); + + if(a_value < b_value) + return -1; + + if(a_value > b_value) + return +1; + + a_index++; + b_index++; + } + + return 0; +} + +void TraverseToEntityViaEvaluableNodeIDPath(Entity *container, EvaluableNode *id_path, Entity *&relative_entity_parent, StringInternRef &id, Entity *&relative_entity) +{ + relative_entity_parent = nullptr; + id = StringInternPool::NOT_A_STRING_ID; + relative_entity = nullptr; + + if(container == nullptr) + return; + + if(EvaluableNode::IsEmptyNode(id_path)) + { + relative_entity = container; + return; + } + + if(id_path->GetOrderedChildNodes().size() == 0) + { + id.SetIDWithReferenceHandoff(EvaluableNode::ToStringIDWithReference(id_path)); + relative_entity = container->GetContainedEntity(id); + relative_entity_parent = container; + return; + } + + relative_entity_parent = container; + relative_entity = container; + for(auto &cn : id_path->GetOrderedChildNodes()) + { + relative_entity_parent = relative_entity; + //if id_path is going past the end of what exists, then it is invalid + if(relative_entity_parent == nullptr) + { + relative_entity = nullptr; + return; + } + + id.SetIDWithReferenceHandoff(EvaluableNode::ToStringIDWithReference(cn)); + relative_entity = relative_entity_parent->GetContainedEntity(id); + } +} + +Entity *TraverseToExistingEntityViaEvaluableNodeIDPath(Entity *container, EvaluableNode *id_path) +{ + if(container == nullptr) + return nullptr; + + if(EvaluableNode::IsEmptyNode(id_path)) + return container; + + if(id_path->GetOrderedChildNodes().size() == 0) + { + //if the string doesn't exist, then there can't be an entity with that name + StringInternPool::StringID sid = EvaluableNode::ToStringIDIfExists(id_path); + return container->GetContainedEntity(sid); + } + + Entity *relative_entity = container; + for(auto &cn : id_path->GetOrderedChildNodes()) + { + //if the string doesn't exist, then there can't be an entity with that name + StringInternPool::StringID sid = EvaluableNode::ToStringIDIfExists(cn); + relative_entity = relative_entity->GetContainedEntity(sid); + if(relative_entity == nullptr) + return nullptr; + } + + return relative_entity; +} + +void TraverseEntityToNewDestinationViaEvaluableNodeIDPath(Entity *container, EvaluableNode *id_path, Entity *&destination_entity_parent, StringInternRef &destination_id) +{ + Entity *destination_entity = nullptr; + TraverseToEntityViaEvaluableNodeIDPath(container, id_path, destination_entity_parent, destination_id, destination_entity); + + //if it already exists, then place inside it + if(destination_entity != nullptr) + { + destination_entity_parent = destination_entity; + destination_entity = nullptr; + + destination_id = StringInternRef::EmptyString(); + } + + //if couldn't get the parent, just use the original container + if(destination_entity_parent == nullptr && destination_id == StringInternPool::NOT_A_STRING_ID) + destination_entity_parent = container; +} + +EvaluableNode *GetTraversalIDPathListFromAToB(EvaluableNodeManager *enm, Entity *a, Entity *b) +{ + //create list to address entity + EvaluableNode *id_list = enm->AllocNode(ENT_LIST); + auto &ocn = id_list->GetOrderedChildNodes(); + while(b != nullptr && b != a) + { + ocn.push_back(enm->AllocNode(ENT_STRING, b->GetIdStringId())); + b = b->GetContainer(); + } + + std::reverse(begin(ocn), end(ocn)); + return id_list; +} + +EvaluableNode *GetTraversalPathListFromAToB(EvaluableNodeManager *enm, EvaluableNode::ReferenceAssocType &node_parents, EvaluableNode *a, EvaluableNode *b) +{ + if(a == nullptr || b == nullptr) + return nullptr; + + EvaluableNode *path_list = enm->AllocNode(ENT_LIST); + + //find a path from b back to a by way of parents + EvaluableNode::ReferenceSetType nodes_visited; + EvaluableNode *b_ancestor = b; + EvaluableNode *b_ancestor_parent = node_parents[b_ancestor]; + + while(b_ancestor_parent != nullptr + && b_ancestor != a //stop if it's the target + && nodes_visited.insert(b_ancestor_parent).second == true) //make sure not visited yet + { + + //find where the node matches + if(b_ancestor_parent->IsAssociativeArray()) + { + //look up which key corresponds to the value + StringInternPool::StringID key_sid = StringInternPool::NOT_A_STRING_ID; + for(auto &[s_id, s] : b_ancestor_parent->GetMappedChildNodesReference()) + { + if(s == b_ancestor) + { + key_sid = s_id; + break; + } + } + + path_list->AppendOrderedChildNode(enm->AllocNode(ENT_STRING, key_sid)); + } + else if(b_ancestor_parent->IsOrderedArray()) + { + auto &b_ancestor_parent_ocn = b_ancestor_parent->GetOrderedChildNodesReference(); + const auto &found = std::find(begin(b_ancestor_parent_ocn), end(b_ancestor_parent_ocn), b_ancestor); + auto index = std::distance(begin(b_ancestor_parent_ocn), found); + path_list->AppendOrderedChildNode(enm->AllocNode(static_cast(index))); + } + else //didn't work... odd/error condition + { + enm->FreeNodeTree(path_list); + return nullptr; + } + + b_ancestor = b_ancestor_parent; + b_ancestor_parent = node_parents[b_ancestor]; + } + + //if didn't end up hitting our target, then we can't get there + if(b_ancestor != a) + { + enm->FreeNodeTree(path_list); + return nullptr; + } + + //reverse because assembled in reverse order + auto &ocn = path_list->GetOrderedChildNodes(); + std::reverse(begin(ocn), end(ocn)); + return path_list; +} + +EvaluableNode **GetRelativeEvaluableNodeFromTraversalPathList(EvaluableNode **source, EvaluableNode **index_path_nodes, size_t num_index_path_nodes, EvaluableNodeManager *enm, size_t max_num_nodes) +{ + //walk through address list to find target + EvaluableNode **destination = source; + for(size_t i = 0; i < num_index_path_nodes; i++) + { + //make sure valid and traversible, since at least one more address will be dereferenced + if(destination == nullptr) + break; + + //fetch the new destination based on what is being fetched + EvaluableNode *addr = index_path_nodes[i]; + bool addr_empty = EvaluableNode::IsEmptyNode(addr); + + //if out of nodes but need to traverse further in the index, then will need to create new nodes + if((*destination) == nullptr) + { + if(enm == nullptr) + { + destination = nullptr; + break; + } + + //need to create a new node to fill in, but create the most generic type possible that uses the type of the index as the way to access it + if(!addr_empty && DoesEvaluableNodeTypeUseNumberData(addr->GetType())) //used to access lists + *destination = enm->AllocNode(ENT_LIST); + else + *destination = enm->AllocNode(ENT_ASSOC); + } + + if(EvaluableNode::IsAssociativeArray(*destination)) + { + auto &mcn = (*destination)->GetMappedChildNodesReference(); + + if(enm == nullptr) + { + auto key_sid = StringInternPool::NOT_A_STRING_ID; + if(!addr_empty) + { + //string must already exist if can't create anything + key_sid = EvaluableNode::ToStringIDIfExists(addr); + if(key_sid == StringInternPool::NOT_A_STRING_ID) + { + destination = nullptr; + break; + } + } + + //try to find key + auto found = mcn.find(key_sid); + if(found == end(mcn)) + { + destination = nullptr; + break; + } + + destination = &(found->second); + } + else //create entry if it doesn't exist + { + auto key_sid = EvaluableNode::ToStringIDWithReference(addr); + + //attempt to insert the new key + auto [inserted_key, inserted] = mcn.insert(std::make_pair(key_sid, nullptr)); + + //if not inserted, then destroy the reference + if(!inserted) + string_intern_pool.DestroyStringReference(key_sid); + + //regardless of whether or not the result was inserted, grab the value portion + destination = &(inserted_key->second); + } + } + else if(!addr_empty && EvaluableNode::IsOrderedArray(*destination)) + { + auto &ocn = (*destination)->GetOrderedChildNodesReference(); + double index = EvaluableNode::ToNumber(addr); + //if negative, start from end and wrap around if the negative index is larger than the size + if(index < 0) + { + index += ocn.size(); + if(index < 0) //clamp at zero + index = 0; + } + + //treat NaNs as 0 + if(FastIsNaN(index)) + index = 0; + + //make sure within bounds + if(index < ocn.size()) + destination = &(ocn[static_cast(index)]); + else //beyond index + { + if(enm == nullptr) + destination = nullptr; + else //resize to fit + { + //if the index is more than can be referenced in 53 bits of 64-bit float mantissa, + // then can't deal with it + if(index >= 9007199254740992) + { + destination = nullptr; + break; + } + + //find the index and validate it + size_t new_index = static_cast(index); + //if have specified a maximum number of nodes (not zero), then abide by it + if(max_num_nodes > 0 && new_index > max_num_nodes) + { + destination = nullptr; + break; + } + + ocn.resize(new_index + 1, nullptr); + destination = &(ocn[new_index]); + } + } + } + else //an immediate value -- can't get anything on the immediate + { + destination = nullptr; + } + } + + return destination; +} + +EvaluableNodeReference AccumulateEvaluableNodeIntoEvaluableNode(EvaluableNodeReference value_destination_node, EvaluableNodeReference variable_value_node, EvaluableNodeManager *enm) +{ + //if the destination is empty, then just use the value specified + if(value_destination_node.reference == nullptr) + return variable_value_node; + + //if the value is unique, then can just edit in place + if(value_destination_node.unique) + { + if(EvaluableNode::CanRepresentValueAsANumber(value_destination_node) && !EvaluableNode::IsNaN(value_destination_node)) + { + double cur_value = EvaluableNode::ToNumber(value_destination_node); + double inc_value = EvaluableNode::ToNumber(variable_value_node); + value_destination_node.reference->SetType(ENT_NUMBER, enm); + value_destination_node->SetNumberValue(cur_value + inc_value); + } + else if(value_destination_node->IsAssociativeArray()) + { + if(EvaluableNode::IsAssociativeArray(variable_value_node)) + { + value_destination_node->ReserveMappedChildNodes(value_destination_node->GetMappedChildNodesReference().size() + + variable_value_node->GetMappedChildNodes().size()); + value_destination_node->AppendMappedChildNodes(variable_value_node->GetMappedChildNodes()); + } + else if(variable_value_node != nullptr) //treat ordered pairs as new entries as long as not nullptr + { + value_destination_node->ReserveMappedChildNodes(value_destination_node->GetMappedChildNodesReference().size() + + variable_value_node->GetOrderedChildNodes().size() / 2); + + //iterate as long as pairs exist + auto &vvn_ocn = variable_value_node->GetOrderedChildNodes(); + for(size_t i = 0; i + 1 < vvn_ocn.size(); i += 2) + { + StringInternPool::StringID key_sid = EvaluableNode::ToStringIDWithReference(vvn_ocn[i]); + value_destination_node->SetMappedChildNodeWithReferenceHandoff(key_sid, vvn_ocn[i + 1]); + } + } + + enm->FreeNodeIfPossible(variable_value_node); + + value_destination_node->SetNeedCycleCheck(true); + value_destination_node.unique = (value_destination_node.unique && variable_value_node.unique); + } + else if(value_destination_node->IsStringValue()) + { + std::string cur_value = EvaluableNode::ToString(value_destination_node); + std::string inc_value = EvaluableNode::ToString(variable_value_node); + value_destination_node->SetType(ENT_STRING, enm); + value_destination_node->SetStringValue(cur_value.append(inc_value)); + value_destination_node.unique = true; + } + else //add ordered child node + { + if(EvaluableNode::IsAssociativeArray(variable_value_node)) + { + //expand out into pairs + value_destination_node->ReserveOrderedChildNodes(value_destination_node->GetOrderedChildNodes().size() + + 2 * variable_value_node->GetMappedChildNodesReference().size()); + + for(auto &[cn_id, cn] : variable_value_node->GetMappedChildNodesReference()) + { + value_destination_node->AppendOrderedChildNode(enm->AllocNode(ENT_STRING, cn_id)); + value_destination_node->AppendOrderedChildNode(cn); + } + + enm->FreeNodeIfPossible(variable_value_node); + } + else if(EvaluableNode::IsOrderedArray(variable_value_node)) + { + value_destination_node->ReserveOrderedChildNodes(value_destination_node->GetOrderedChildNodes().size() + + variable_value_node->GetOrderedChildNodesReference().size()); + value_destination_node->AppendOrderedChildNodes(variable_value_node->GetOrderedChildNodesReference()); + + enm->FreeNodeIfPossible(variable_value_node); + } + else //just append one value + { + value_destination_node->AppendOrderedChildNode(variable_value_node); + } + + value_destination_node->SetNeedCycleCheck(true); + value_destination_node.unique = (value_destination_node.unique && variable_value_node.unique); + } + + return value_destination_node; + } + + //not unique, so need to make a new list + if(EvaluableNode::CanRepresentValueAsANumber(value_destination_node) && !EvaluableNode::IsNaN(value_destination_node)) + { + double cur_value = EvaluableNode::ToNumber(value_destination_node); + double inc_value = EvaluableNode::ToNumber(variable_value_node); + value_destination_node.reference = enm->AllocNode(cur_value + inc_value); + value_destination_node.unique = true; + } + else if(value_destination_node->IsAssociativeArray()) + { + EvaluableNode *new_list = enm->AllocNode(value_destination_node->GetType()); + + if(EvaluableNode::IsAssociativeArray(variable_value_node)) + { + new_list->ReserveMappedChildNodes(value_destination_node->GetMappedChildNodes().size() + + variable_value_node->GetMappedChildNodesReference().size()); + new_list->SetMappedChildNodes(value_destination_node->GetMappedChildNodes(), true); + new_list->AppendMappedChildNodes(variable_value_node->GetMappedChildNodes()); + } + else if(variable_value_node != nullptr) //treat ordered pairs as new entries as long as not nullptr + { + new_list->ReserveMappedChildNodes(value_destination_node->GetMappedChildNodes().size() + variable_value_node->GetOrderedChildNodes().size() / 2); + new_list->SetMappedChildNodes(value_destination_node->GetMappedChildNodes(), true); + //iterate as long as pairs exist + auto &vvn_ocn = variable_value_node->GetOrderedChildNodes(); + for(size_t i = 0; i + 1 < vvn_ocn.size(); i += 2) + { + StringInternPool::StringID key_sid = EvaluableNode::ToStringIDWithReference(vvn_ocn[i]); + new_list->SetMappedChildNodeWithReferenceHandoff(key_sid, vvn_ocn[i + 1]); + } + } + + enm->FreeNodeIfPossible(variable_value_node); + + value_destination_node.reference = new_list; + value_destination_node->SetNeedCycleCheck(true); + value_destination_node.unique = (value_destination_node.unique && variable_value_node.unique); + } + else if(value_destination_node->IsStringValue()) + { + std::string cur_value = EvaluableNode::ToString(value_destination_node); + std::string inc_value = EvaluableNode::ToString(variable_value_node); + value_destination_node.reference = enm->AllocNode(ENT_STRING, cur_value.append(inc_value)); + value_destination_node.unique = true; + } + else //add ordered child node + { + EvaluableNode *new_list = enm->AllocNode(ENT_LIST); + if(EvaluableNode::IsAssociativeArray(variable_value_node)) + { + //expand out into pairs + new_list->ReserveOrderedChildNodes(value_destination_node->GetOrderedChildNodes().size() + 2 * variable_value_node->GetMappedChildNodes().size()); + new_list->AppendOrderedChildNodes(value_destination_node->GetOrderedChildNodes()); + for(auto &[cn_id, cn] : variable_value_node->GetMappedChildNodes()) + { + new_list->AppendOrderedChildNode(enm->AllocNode(ENT_STRING, cn_id)); + new_list->AppendOrderedChildNode(cn); + } + + enm->FreeNodeIfPossible(variable_value_node); + } + else if(EvaluableNode::IsOrderedArray(variable_value_node)) + { + new_list->ReserveOrderedChildNodes(value_destination_node->GetOrderedChildNodes().size() + variable_value_node->GetOrderedChildNodes().size()); + new_list->AppendOrderedChildNodes(value_destination_node->GetOrderedChildNodes()); + new_list->AppendOrderedChildNodes(variable_value_node->GetOrderedChildNodes()); + + enm->FreeNodeIfPossible(variable_value_node); + } + else //just append one value + { + new_list->ReserveOrderedChildNodes(value_destination_node->GetOrderedChildNodes().size() + 1); + new_list->AppendOrderedChildNodes(value_destination_node->GetOrderedChildNodes()); + new_list->AppendOrderedChildNode(variable_value_node); + } + + value_destination_node.reference = new_list; + value_destination_node->SetNeedCycleCheck(true); + value_destination_node.unique = (value_destination_node.unique && variable_value_node.unique); + } + + return value_destination_node; +} diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.h b/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.h new file mode 100644 index 00000000..7cac8848 --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeFunctions.h @@ -0,0 +1,174 @@ +#pragma once + +//project headers: +#include "Entity.h" +#include "EvaluableNode.h" + +//system headers: +#include +#include +#include + +//forward declarations: +class Interpreter; + +//used for any operation that must sort different values - for passing in a lambda to run on every operation +class CustomEvaluableNodeComparator +{ +public: + constexpr CustomEvaluableNodeComparator(Interpreter *_interpreter, EvaluableNode *_function, EvaluableNode *target_list) + : interpreter(_interpreter), function(_function), targetList(target_list) + { } + + bool operator()(EvaluableNode *a, EvaluableNode *b); + +private: + Interpreter *interpreter; + EvaluableNode *function; + EvaluableNode *targetList; +}; + +//sorts list based on the specified CustomEvaluableNodeComparator using a stable merge sort +// does not require weak ordering from cenc +// merge sort is the preferrable sort due to the lack of weak ordering and bottleneck being interpretation +//returns a newly sorted list +std::vector CustomEvaluableNodeOrderedChildNodesSort(std::vector &list, CustomEvaluableNodeComparator &cenc); + +//Returns positive if a is less than b, +// negative if greater, or 0 if equal or not numerically comparable +int StringNaturalCompare(const std::string &a, const std::string &b); + +inline int StringNaturalCompare(const StringInternPool::StringID a, const StringInternPool::StringID b) +{ + return StringNaturalCompare(string_intern_pool.GetStringFromID(a), string_intern_pool.GetStringFromID(b)); +} + +inline bool StringNaturalCompareSort(const std::string &a, const std::string &b) +{ + int comp = StringNaturalCompare(a, b); + return comp < 0; +} + +inline bool StringIDNaturalCompareSort(const StringInternPool::StringID a, const StringInternPool::StringID b) +{ + int comp = StringNaturalCompare(string_intern_pool.GetStringFromID(a), string_intern_pool.GetStringFromID(b)); + return comp < 0; +} + +inline bool StringNaturalCompareSortReverse(const std::string &a, const std::string &b) +{ + int comp = StringNaturalCompare(a, b); + return comp > 0; +} + +inline bool StringIDNaturalCompareSortReverse(const StringInternPool::StringID a, const StringInternPool::StringID b) +{ + int comp = StringNaturalCompare(a, b); + return comp > 0; +} + +//Starts at the container specified and traverses the id list specified, finding the relative Entity from container +// if id_path is nullptr, then it will set relative_entity to the container itself, leaving relative_entity_parent to nullptr +// if id_path is invalid or container is nullptr, then it will set both relative_entity and relative_entity_parent to nullptr +// if id_path is any form of a list, then it will treat the ids as a sequence of subcontainers +// otherwise the id_path is transformed to a string and used as an id +//sets relative_entity_parent to the base entity found, sets id to the value of the id relative to the base, and relative_entity to the entity being pointed to +// if the path exists (as in a destination of where to put an entity) but the target entity does not, then relative_entity_parent may be a valid reference and relative_entity may be nullptr +//Note that id is allocated in the string_intern_pool, and the caller is responsible for freeing the allocation +void TraverseToEntityViaEvaluableNodeIDPath(Entity *container, EvaluableNode *id_path, Entity *&relative_entity_parent, StringInternRef &id, Entity *&relative_entity); + +//Starts at the container specified and traverses the id list specified, finding the relative Entity from container +// if id_path does not exist or is invalid then returns nullptr +Entity *TraverseToExistingEntityViaEvaluableNodeIDPath(Entity *container, EvaluableNode *id_path); + +//Like TraverseToEntityViaEvaluableNodeIDPath, except ensures that the final destination does not exist (or if it does, it will place it within the entity specified +//Note that destination_id is allocated in the string_intern_pool, and the caller is responsible for freeing the allocation +void TraverseEntityToNewDestinationViaEvaluableNodeIDPath(Entity *container, EvaluableNode *id_path, Entity *&destination_entity_parent, StringInternRef &destination_id); + +//constructs a list of IDs that will traverse frome a to b, assuming that b is contained somewhere within a +EvaluableNode *GetTraversalIDPathListFromAToB(EvaluableNodeManager *enm, Entity *a, Entity *b); + +//similar to Parser::GetCodeForPathFromAToB, but instead returns a list of how to traverse each node, such as which index or which key to use to traverse +// returns nullptr if no path exists +EvaluableNode *GetTraversalPathListFromAToB(EvaluableNodeManager *enm, EvaluableNode::ReferenceAssocType &node_parents, EvaluableNode *a, EvaluableNode *b); + +//Starts at source and traverses based on the indexes in the index_path, assuming that index_path is a list of ordered nodes each +// of which specifies the index (number of string) to traverse +//if the index_path_nodes should be a pointer to an array of EvaluableNodes *s of length num_index_path_nodes +//if enm is non-null, then it will enlarge lists sizes, add assoc keys, and create entirely new nodes (of default types) if the target does not exist, up to a maximum of max_num_nodes +// (unless max_num_nodes is 0, in which case it is ignored) +// if it is null, then it will only return existing nodes +EvaluableNode **GetRelativeEvaluableNodeFromTraversalPathList(EvaluableNode **source, EvaluableNode **index_path_nodes, size_t num_index_path_nodes, EvaluableNodeManager *enm, size_t max_num_nodes); + +//accumulates variable_value_node into value_destination_node and returns the result +// will free the top node of variable_value_node if possible; e.g., if appending a list, to a list, will free the second list if possible +EvaluableNodeReference AccumulateEvaluableNodeIntoEvaluableNode(EvaluableNodeReference value_destination_node, EvaluableNodeReference variable_value_node, EvaluableNodeManager *enm); + +//using enm, builds an assoc from id_value_container using get_string_id and get_number to get the id and number of each entry +//note that get_string_id will be called twice and will be called under locks in multithreading, so it should be a very simple function +template +inline EvaluableNodeReference CreateAssocOfNumbersFromIteratorAndFunctions(IDValueContainer &id_value_container, + IDFunction get_string_id, ValueFunction get_number, EvaluableNodeManager *enm) +{ + EvaluableNode *assoc = enm->AllocNode(ENT_ASSOC); + assoc->ReserveMappedChildNodes(id_value_container.size()); + + string_intern_pool.CreateStringReferences(id_value_container, get_string_id); + + for(auto &id_value_iterator : id_value_container) + { + StringInternPool::StringID entity_sid = get_string_id(id_value_iterator); + assoc->SetMappedChildNodeWithReferenceHandoff(entity_sid, enm->AllocNode(get_number(id_value_iterator))); + } + + return EvaluableNodeReference(assoc, true); +} + +//using enm, builds a list from value_container using get_string_id and get_number to get the id and number of each entry +template +inline EvaluableNodeReference CreateListOfNumbersFromIteratorAndFunction(ValueContainer &value_container, + EvaluableNodeManager *enm, GetNumberFunction get_number) +{ + EvaluableNode *list = enm->AllocListNodeWithOrderedChildNodes(ENT_NUMBER, value_container.size()); + auto &ocn = list->GetOrderedChildNodes(); + + size_t index = 0; + for(auto value_element : value_container) + ocn[index++]->SetNumberValue(get_number(value_element)); + + return EvaluableNodeReference(list, true); +} + +//using enm, builds a list from string_container to id_value_iterator_end using get_string_id and get_number to get the id and number of each entry +//note that get_string_id will be called twice and will be called under locks in multithreading, so it should be a very simple function +template +inline EvaluableNodeReference CreateListOfStringsIdsFromIteratorAndFunction(StringContainer &string_container, + EvaluableNodeManager *enm, GetStringFunction get_string_id) +{ + EvaluableNode *list = enm->AllocListNodeWithOrderedChildNodes(ENT_STRING, string_container.size()); + auto &ocn = list->GetOrderedChildNodes(); + + string_intern_pool.CreateStringReferences(string_container, get_string_id); + + size_t index = 0; + for(auto string_element : string_container) + ocn[index++]->SetStringIDWithReferenceHandoff(get_string_id(string_element)); + + return EvaluableNodeReference(list, true); +} + +//using enm, builds a list from string_container to id_value_iterator_end using get_string_id and get_number to get the id and number of each entry +//note that get_string_id will be called twice and will be called under locks in multithreading, so it should be a very simple function +template +inline EvaluableNodeReference CreateListOfStringsFromIteratorAndFunction(StringContainer &string_container, + EvaluableNodeManager *enm, GetStringFunction get_string) +{ + EvaluableNode *list = enm->AllocListNodeWithOrderedChildNodes(ENT_STRING, string_container.size()); + auto &ocn = list->GetOrderedChildNodes(); + + size_t index = 0; + for(auto string_element : string_container) + ocn[index++]->SetStringValue(get_string(string_element)); + + return EvaluableNodeReference(list, true); +} diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp new file mode 100644 index 00000000..4f64e170 --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.cpp @@ -0,0 +1,2211 @@ +//project headers: +#include "EvaluableNodeTreeManipulation.h" +#include "EvaluableNode.h" +#include "EvaluableNodeTreeFunctions.h" +#include "FastMath.h" +#include "Interpreter.h" +#include "Merger.h" + +//system headers: +#include + +EvaluableNodeTreeManipulation::NodesMixMethod::NodesMixMethod(RandomStream random_stream, EvaluableNodeManager *_enm, + double fraction_a, double fraction_b, double similar_mix_chance) : NodesMergeMethod(_enm, true, false) +{ + randomStream = random_stream; + + //clamp each to the appropriate range, 0 to 1 for fractions, -1 to 1 for similarMixChance + if(FastIsNaN(fraction_a)) + fractionA = 0.0; + else + fractionA = std::min(1.0, std::max(0.0, fraction_a)); + + if(FastIsNaN(fraction_b)) + fractionB = 0.0; + else + fractionB = std::min(1.0, std::max(0.0, fraction_b)); + + fractionAOrB = fractionA + fractionB - fractionA * fractionB; + fractionAInsteadOfB = fractionA / (fractionA + fractionB); + + //similarMixChance can go from -1 to 1 + if(FastIsNaN(similar_mix_chance)) + similarMixChance = 0.0; + else + similarMixChance = std::min(1.0, std::max(-1.0, similar_mix_chance)); +} + +//returns a mix of a and b based on their fractions +inline double MixNumberValues(double a, double b, double fraction_a, double fraction_b) +{ + //quick exit for when they match + if(EqualIncludingNaN(a, b)) + return a; + + //handle nans + if(FastIsNaN(a)) + { + if(fraction_a > 0) + return std::numeric_limits::quiet_NaN(); + else + return b; + } + + if(FastIsNaN(b)) + { + if(fraction_b > 0) + return std::numeric_limits::quiet_NaN(); + else + return a; + } + + //normalize fractions + fraction_a = fraction_a / (fraction_a + fraction_b); + return a * fraction_a + b * (1 - fraction_a); +} + +//returns a mix of a and b based on their fractions +inline StringInternPool::StringID MixStringValues(StringInternPool::StringID a, StringInternPool::StringID b, + RandomStream random_stream, double fraction_a, double fraction_b) +{ + //quick exit for when they match + if(a == b) + return string_intern_pool.CreateStringReference(a); + + if(a == StringInternPool::NOT_A_STRING_ID) + return string_intern_pool.CreateStringReference(b); + + if(b == StringInternPool::NOT_A_STRING_ID) + return string_intern_pool.CreateStringReference(a); + + const auto &a_str = string_intern_pool.GetStringFromID(a); + const auto &b_str = string_intern_pool.GetStringFromID(b); + std::string result = EvaluableNodeTreeManipulation::MixStrings(a_str, b_str, + random_stream, fraction_a, fraction_b); + + return string_intern_pool.CreateStringReference(result); +} + +bool EvaluableNodeTreeManipulation::NodesMergeMethod::AreMergeable(EvaluableNode *a, EvaluableNode *b) +{ + size_t num_common_labels; + size_t num_unique_labels; + EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b, num_common_labels, num_unique_labels); + + auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(a, b, true); + + return (commonality == 1.0 && num_unique_labels == 0); +} + +EvaluableNode *EvaluableNodeTreeManipulation::NodesMixMethod::MergeValues(EvaluableNode *a, EvaluableNode *b, bool must_merge) +{ + //early out + if(a == nullptr && b == nullptr) + return nullptr; + + if(AreMergeable(a, b) || must_merge) + { + EvaluableNode *merged = MergeTrees(this, a, b); + + //if the original and merged, check to see if mergeable of same type, and if so, interpolate + if(merged != nullptr && a != nullptr && b != nullptr) + { + if(merged->IsNativelyNumeric() && a->IsNativelyNumeric() && b->IsNativelyNumeric()) + { + double a_value = a->GetNumberValue(); + double b_value = b->GetNumberValue(); + double mixed_value = MixNumberValues(a_value, b_value, fractionA, fractionB); + merged->SetNumberValue(mixed_value); + } + else if(merged->GetType() == ENT_STRING && a->GetType() == ENT_STRING && b->GetType() == ENT_STRING) + { + auto a_value = a->GetStringID(); + auto b_value = b->GetStringID(); + auto mixed_value = MixStringValues(a_value, b_value, + randomStream.CreateOtherStreamViaRand(), fractionA, fractionB); + merged->SetStringIDWithReferenceHandoff(mixed_value); + } + } + + return merged; + } + + if(KeepNonMergeableAInsteadOfB()) + return MergeTrees(this, a, nullptr); + else + return MergeTrees(this, nullptr, b); +} + +bool EvaluableNodeTreeManipulation::NodesMixMethod::AreMergeable(EvaluableNode *a, EvaluableNode *b) +{ + size_t num_common_labels; + size_t num_unique_labels; + EvaluableNode::GetNodeCommonAndUniqueLabelCounts(a, b, num_common_labels, num_unique_labels); + + auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(a, b); + + //if the immediate nodes are in fact a match, then just merge them + if(commonality == 1.0 && num_unique_labels == 0) + return true; + + //assess overall commonality between value commonality and label commonality + double overall_commonality = (commonality + num_common_labels) + / (1 + num_common_labels + num_unique_labels); + + double prob_of_match = overall_commonality; + if(commonality > 0) + { + if(similarMixChance > 0.0) + { + //probability of match is commonality OR similarMixChance + // however, these are not mutually exclusive, so need to remove the conjunction of the + // probability of both to prevent double-counting + prob_of_match = overall_commonality + similarMixChance - overall_commonality * similarMixChance; + } + else if(similarMixChance < 0) + { + //probability of match is commonality AND not (negative similarMixChance) + // because similarMixChance is negative, adding to 1 is the same as NOT + prob_of_match = overall_commonality * (1.0 + similarMixChance); + } + //else 0.0 or NaN, just leave as overall_commonality + } + + return randomStream.Rand() < prob_of_match; +} + +MergeMetricResults EvaluableNodeTreeManipulation::StringSequenceMergeMetric::MergeMetric(std::string *a, std::string *b) +{ + if(a == b || (a != nullptr && b != nullptr && *a == *b)) + return MergeMetricResults(1.0, a, b); + else + return MergeMetricResults(0.0, a, b); +} + +std::string *EvaluableNodeTreeManipulation::StringSequenceMergeMetric::MergeValues(std::string *a, std::string *b, bool must_merge) +{ + if(keepAllOfBoth) + { + if(a != nullptr) + return a; + return b; + } + + //pick one, so select a + return a; +} + +EvaluableNodeTreeManipulation::StringsMixMethodUtf8::StringsMixMethodUtf8(RandomStream random_stream, + double fraction_a, double fraction_b) +{ + randomStream = random_stream; + + //clamp each to the appropriate range of [0,1] + if(FastIsNaN(fraction_a)) + fractionA = 0.0; + else + fractionA = std::min(1.0, std::max(0.0, fraction_a)); + + if(FastIsNaN(fraction_b)) + fractionB = 0.0; + else + fractionB = std::min(1.0, std::max(0.0, fraction_b)); + + fractionAOrB = fractionA + fractionB - fractionA * fractionB; + fractionAInsteadOfB = fractionA / (fractionA + fractionB); +} + +EvaluableNode *EvaluableNodeTreeManipulation::IntersectTrees(EvaluableNodeManager *enm, EvaluableNode *tree1, EvaluableNode *tree2) +{ + NodesMergeMethod mm(enm, false, true); + return mm.MergeValues(tree1, tree2); +} + +EvaluableNode *EvaluableNodeTreeManipulation::UnionTrees(EvaluableNodeManager *enm, EvaluableNode *tree1, EvaluableNode *tree2) +{ + NodesMergeMethod mm(enm, true, true); + return mm.MergeValues(tree1, tree2); +} + +EvaluableNode *EvaluableNodeTreeManipulation::MixTrees(RandomStream random_stream, EvaluableNodeManager *enm, EvaluableNode *tree1, EvaluableNode *tree2, + double fraction_a, double fraction_b, double similar_mix_chance) +{ + NodesMixMethod mm(random_stream, enm, fraction_a, fraction_b, similar_mix_chance); + return mm.MergeValues(tree1, tree2); +} + +EvaluableNode *EvaluableNodeTreeManipulation::MixTreesByCommonLabels(Interpreter *interpreter, EvaluableNodeManager *enm, + EvaluableNodeReference tree1, EvaluableNodeReference tree2, RandomStream &rs, double fraction_a, double fraction_b) +{ + //can't merge anything into an empty tree + if(tree1 == nullptr) + return nullptr; + + EvaluableNodeReference result_tree = enm->DeepAllocCopy(tree1); + + //if nothing to merge into the first tree, then just return unmodified copy + if(tree2 == nullptr) + return result_tree; + + auto index1 = RetrieveLabelIndexesFromTree(tree1.reference); + auto index2 = RetrieveLabelIndexesFromTree(tree2.reference); + + //normalize fraction to be less than 1 + double total_fraction = fraction_a + fraction_b; + if(total_fraction > 1.0) + { + fraction_a /= total_fraction; + fraction_b /= total_fraction; + } + + //get only labels that are in both trees + + //get list of labels from both + CompactHashSet common_labels(index1.size() + index2.size()); + for(auto &[node_id, _] : index1) + common_labels.insert(node_id); + for(auto &[node_id, _] : index2) + common_labels.insert(node_id); + + //get number of labels from each + std::vector all_labels(begin(common_labels), end(common_labels)); + size_t num_from_2 = static_cast(fraction_b * all_labels.size()); + size_t num_to_remove = static_cast((1.0 - fraction_a - fraction_b) * all_labels.size()); + + //remove labels from the first that are not used + for(size_t i = 0; i < num_to_remove; i++) + { + //take a random string + size_t index_to_remove = rs.RandSize(all_labels.size()); + StringInternPool::StringID label_id = all_labels[index_to_remove]; + all_labels.erase(begin(all_labels) + index_to_remove); + + //remove its label. Reuse enm for temporary since used it to create the new tree + ReplaceLabelInTree(result_tree.reference, label_id, nullptr); + } + + //replace labels from the second + for(size_t i = 0; i < num_from_2; i++) + { + //take a random string + size_t index_to_remove = rs.RandSize(all_labels.size()); + StringInternPool::StringID label_id = all_labels[index_to_remove]; + all_labels.erase(begin(all_labels) + index_to_remove); + + //replace with something from the other tree. Reuse enm for temporary since used it to create the new tree + const auto replacement_index = index2.find(label_id); + if(replacement_index != end(index2)) + { + EvaluableNode *replacement = enm->DeepAllocCopy(replacement_index->second); + ReplaceLabelInTree(result_tree.reference, label_id, replacement); + } + } + + return result_tree; +} + +std::string EvaluableNodeTreeManipulation::MixStrings(const std::string &a, const std::string &b, + RandomStream random_stream, double fraction_a, double fraction_b) +{ + StringManipulation::ExplodeUTF8Characters(a, aCharsBuffer); + StringManipulation::ExplodeUTF8Characters(b, bCharsBuffer); + + StringsMixMethodUtf8 smm(random_stream, fraction_a, fraction_b); + auto destCharsBuffer = smm.MergeSequences(aCharsBuffer, bCharsBuffer); + + std::string result = StringManipulation::ConcatUTF8Characters(destCharsBuffer); + return result; +} + +bool EvaluableNodeTreeManipulation::DoesTreeContainLabels(EvaluableNode *en) +{ + if(en == nullptr) + return false; + + if(en->GetNumChildNodes() == 0) + return (en->GetNumLabels() > 0); + + if(!en->GetNeedCycleCheck()) + return NonCycleDoesTreeContainLabels(en); + + EvaluableNode::ReferenceSetType checked; + return DoesTreeContainLabels(en, checked); +} + +std::pair EvaluableNodeTreeManipulation::RetrieveLabelIndexesFromTreeAndNormalize(EvaluableNode *en) +{ + Entity::LabelsAssocType index; + EvaluableNode::ReferenceSetType checked; + + //can check faster if don't need to check for cycles + bool en_cycle_free = (en == nullptr || !en->GetNeedCycleCheck()); + bool label_collision = CollectLabelIndexesFromNormalTree(en, index, en_cycle_free ? nullptr : &checked); + + //if no collision, return + if(!label_collision) + return std::make_pair(index, false); + + //keep replacing until don't need to replace anymore + EvaluableNode *to_replace = nullptr; + while(true) + { + index.clear(); + checked.clear(); + bool replacement = CollectLabelIndexesFromTreeAndMakeLabelNormalizationPass(en, index, checked, to_replace); + + if(!replacement) + break; + } + + //things have been replaced, so anything might need to be updated + EvaluableNodeManager::UpdateFlagsForNodeTree(en, checked); + + return std::make_pair(index, true); +} + +void EvaluableNodeTreeManipulation::ReplaceLabelInTreeRecurse(EvaluableNode *&tree, StringInternPool::StringID label_id, + EvaluableNode *replacement, EvaluableNode::ReferenceSetType &checked) +{ + //validate input + if(tree == nullptr || label_id == StringInternPool::NOT_A_STRING_ID) + return; + + //try to insert. if fails, then it has already been inserted, so ignore + if(checked.insert(tree).second == false) + return; + + size_t num_node_labels = tree->GetNumLabels(); + if(num_node_labels > 0) + { + //see if this node either has multiple labels or is a match; if so, need to replace it + if(num_node_labels > 1 || tree->GetLabelStringId(0) == label_id) + { + //get the labels in case we'll need to merge them + const auto &tree_node_label_sids = tree->GetLabelsStringIds(); + if(std::find(begin(tree_node_label_sids), end(tree_node_label_sids), label_id) != end(tree_node_label_sids)) + { + EvaluableNode *result = replacement; + if(result != nullptr) + { + //copy over relevant labels to the new node + std::vector new_labels; + if(replacement != nullptr) + new_labels = replacement->GetLabelsStringIds(); + + result->SetLabelsStringIds(UnionStringIDVectors(tree_node_label_sids, new_labels)); + } + + //don't free anything, because it could be referred to by other locations + tree = result; + return; + } + } + } + + //update all ordered child nodes + for(auto &cn : tree->GetOrderedChildNodes()) + ReplaceLabelInTreeRecurse(cn, label_id, replacement, checked); + + //update all mapped child nodes + for(auto &[_, cn] : tree->GetMappedChildNodes()) + ReplaceLabelInTreeRecurse(cn, label_id, replacement, checked); +} + +EvaluableNode *EvaluableNodeTreeManipulation::CreateGeneralizedNode(NodesMergeMethod *mm, EvaluableNode *n1, EvaluableNode *n2) +{ + if(n1 == nullptr && n2 == nullptr) + return nullptr; + + EvaluableNodeManager *enm = mm->enm; + + //if want to keep all of both and only one exists, copy it + if(mm->KeepSomeNonMergeableValues()) + { + if(n1 != nullptr && n2 == nullptr) + return enm->AllocNode(n1); + else if(n1 == nullptr && n2 != nullptr) + return enm->AllocNode(n2); + } + + auto [node, commonality] = EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues(n1, n2); + + //if both are nullptr, nothing more to do + if(node == nullptr) + return nullptr; + + //see if need exact commonality + if(mm->RequireExactMatches() && commonality != 1.0) + return nullptr; + + //make a new copy of it + auto common_type = node->GetType(); + EvaluableNode *n = enm->AllocNode(common_type); + + //if immediate, copy value + if(DoesEvaluableNodeTypeUseNumberData(common_type)) + n->SetNumberValue(node->GetNumberValue()); + else if(DoesEvaluableNodeTypeUseStringData(common_type)) + n->SetStringID(node->GetStringID()); + + //merge labels + size_t n1_num_labels = n1->GetNumLabels(); + size_t n2_num_labels = n2->GetNumLabels(); + if(mm->KeepSomeNonMergeableValues()) + { + if(n1_num_labels > 0 || n2_num_labels > 0) + n->SetLabelsStringIds(UnionStringIDVectors(n1->GetLabelsStringIds(), n2->GetLabelsStringIds())); + } + else + { + if(n1_num_labels > 0 && n2_num_labels > 0) + n->SetLabelsStringIds(IntersectStringIDVectors(n1->GetLabelsStringIds(), n2->GetLabelsStringIds())); + } + + //merge comments if they exist + if(n1->GetCommentsStringId() != StringInternPool::NOT_A_STRING_ID || n2->GetCommentsStringId() != StringInternPool::NOT_A_STRING_ID) + { + //convert from vectors of strings to vectors of pointers to strings so can merge on them + auto n1_comment_strings = n1->GetCommentsSeparateLines(); + std::vector n1_comment_string_ptrs(n1_comment_strings.size()); + for(size_t i = 0; i < n1_comment_strings.size(); i++) + n1_comment_string_ptrs[i] = &n1_comment_strings[i]; + + auto n2_comment_strings = n2->GetCommentsSeparateLines(); + std::vector n2_comment_string_ptrs(n2_comment_strings.size()); + for(size_t i = 0; i < n2_comment_strings.size(); i++) + n2_comment_string_ptrs[i] = &n2_comment_strings[i]; + + StringSequenceMergeMetric ssmm(mm->KeepSomeNonMergeableValues()); + auto merged_comment_lines = ssmm.MergeSequences(n1_comment_string_ptrs, n2_comment_string_ptrs); + + //append back to one string + std::string merged_comments; + for(auto &line : merged_comment_lines) + { + //if already have comments, append a newline + if(merged_comments.size() > 0) + merged_comments.append("\r\n"); + merged_comments.append(*line); + } + + n->SetComments(merged_comments); + } + + return n; +} + +std::vector EvaluableNodeTreeManipulation::UnionStringIDVectors(const std::vector &label_list_a, const std::vector &label_list_b) +{ + //quick shortcuts in case either list is empty + if(label_list_a.size() == 0) + return std::vector(label_list_b); + if(label_list_b.size() == 0) + return std::vector(label_list_a); + + //create list of unique labels included in either + auto all_labels = CompactHashSet(label_list_a.size() + label_list_b.size()); + all_labels.insert(begin(label_list_a), end(label_list_a)); + all_labels.insert(begin(label_list_b), end(label_list_b)); + return std::vector(begin(all_labels), end(all_labels)); +} + +std::vector EvaluableNodeTreeManipulation::IntersectStringIDVectors(const std::vector &label_list_a, const std::vector &label_list_b) +{ + //quick shortcut in case either list is empty + if(label_list_a.size() == 0 || label_list_b.size() == 0) + return std::vector(); + + std::vector labels_in_1(begin(label_list_a), end(label_list_a)); + std::vector labels_in_2(begin(label_list_b), end(label_list_b)); + std::vector common_labels(label_list_a.size() + label_list_b.size()); //hold enough in case all are unique + + //sort both of the lists as required before passing into set_intersection + std::sort(begin(labels_in_1), end(labels_in_1)); + std::sort(begin(labels_in_2), end(labels_in_2)); + + //create a new clean set and insert the set intersection + auto common_end = std::set_intersection(begin(labels_in_1), end(labels_in_1), begin(labels_in_2), end(labels_in_2), begin(common_labels)); + + //get rid of any unused entries + common_labels.resize(common_end - begin(common_labels)); + return common_labels; +} + +EvaluableNode *EvaluableNodeTreeManipulation::MergeTrees(NodesMergeMethod *mm, EvaluableNode *tree1, EvaluableNode *tree2) +{ + //shortcut for merging empty trees + if(tree1 == nullptr && tree2 == nullptr) + return nullptr; + + //if it's already been merged, then return the previous merged version + auto &references = mm->GetReferences(); + + auto find_tree1 = references.find(tree1); + if(find_tree1 != end(references)) + return find_tree1->second; + + auto find_tree2 = references.find(tree2); + if(find_tree2 != end(references)) + return find_tree2->second; + + //find best node to combine from each tree + auto best_shared_nodes_match = mm->MergeMetric(tree1, tree2); + //if not keeping any nonmergeable values, then just cut out anything that isn't common + if(!mm->KeepSomeNonMergeableValues()) + { + tree1 = best_shared_nodes_match.elementA; + tree2 = best_shared_nodes_match.elementB; + } + else if( (tree1 != best_shared_nodes_match.elementA && mm->KeepNonMergeableA()) + || (tree2 != best_shared_nodes_match.elementB && mm->KeepNonMergeableB()) ) + { + //might keep one or the other, so make a merge which will be kept in references + //if the reference is hit, it will be used + //this result may be not used some of the time due to being cut out, but most of the time this will be efficient + MergeTrees(mm, best_shared_nodes_match.elementA, best_shared_nodes_match.elementB); + + //whichever one doesn't match, set that one to null and merge on the one that did + if(tree1 != best_shared_nodes_match.elementA) + tree2 = nullptr; + else if(tree2 != best_shared_nodes_match.elementB) + tree1 = nullptr; + } + + //get new generalized node of all + EvaluableNode *generalized_node = EvaluableNodeTreeManipulation::CreateGeneralizedNode(mm, tree1, tree2); + + //if nothing, then don't keep processing + if(generalized_node == nullptr) + return nullptr; + + //put it in the references list for both trees + if(tree1 != nullptr) + references[tree1] = generalized_node; + if(tree2 != nullptr) + references[tree2] = generalized_node; + + //if the generalized_node is assoc and at least one is an assoc, + // make sure are initialized (or initialize) and merge + if( generalized_node->IsAssociativeArray() && + ( (tree1 != nullptr && tree1->IsAssociativeArray()) + || (tree2 != nullptr && tree2->IsAssociativeArray())) ) + { + //get or convert the nodes to an assoc for tree1 + EvaluableNode::AssocType tree1_conversion_assoc; + auto *tree1_mapped_childs = &tree1_conversion_assoc; + if(tree1 != nullptr && tree1->IsAssociativeArray()) + tree1_mapped_childs = &tree1->GetMappedChildNodesReference(); + + //get or convert the nodes to an assoc for tree2 + EvaluableNode::AssocType tree2_conversion_assoc; + auto *tree2_mapped_childs = &tree2_conversion_assoc; + if(tree2 != nullptr && tree2->IsAssociativeArray()) + tree2_mapped_childs = &tree2->GetMappedChildNodesReference(); + + EvaluableNode::AssocType merged = mm->MergeMaps(*tree1_mapped_childs, *tree2_mapped_childs); + //hand off merged allocation into the generalized_node (hence the false parameter) + generalized_node->SetMappedChildNodes(merged, false); + + return generalized_node; + } + + std::vector empty_vector; + + auto *tree1_ordered_childs = &empty_vector; + if(tree1 != nullptr && tree1->IsOrderedArray()) + tree1_ordered_childs = &tree1->GetOrderedChildNodesReference(); + + auto *tree2_ordered_childs = &empty_vector; + if(tree2 != nullptr && tree2->IsOrderedArray()) + tree2_ordered_childs = &tree2->GetOrderedChildNodesReference(); + + //see if both trees have ordered child nodes + if(tree1_ordered_childs->size() > 0 || tree2_ordered_childs->size() > 0) + { + auto iocnt = GetInstructionOrderedChildNodeType(generalized_node->GetType()); + switch(iocnt) + { + case OCNT_UNORDERED: + generalized_node->SetOrderedChildNodes(mm->MergeUnorderedSets(*tree1_ordered_childs, *tree2_ordered_childs)); + break; + + case OCNT_ORDERED: + generalized_node->SetOrderedChildNodes(mm->MergeSequences(*tree1_ordered_childs, *tree2_ordered_childs)); + break; + + case OCNT_ONE_POSITION_THEN_ORDERED: + case OCNT_ONE_POSITION_THEN_PAIRED: + { + //start from a clean slate + generalized_node->ClearOrderedChildNodes(); + + //make arrays of just the first node + std::vector a1; + std::vector a2; + if(tree1_ordered_childs->size() > 0) + a1.emplace_back((*tree1_ordered_childs)[0]); + if(tree2_ordered_childs->size() > 0) + a2.emplace_back((*tree2_ordered_childs)[0]); + + //put on the first position + auto merged = mm->MergePositions(a1, a2); + generalized_node->GetOrderedChildNodes().insert(end(generalized_node->GetOrderedChildNodes()), begin(merged), end(merged)); + + //make new arrays without first position + a1.clear(); + a2.clear(); + if(tree1_ordered_childs->size() > 0) + a1.insert(begin(a1), begin(*tree1_ordered_childs), end(*tree1_ordered_childs)); + if(tree2_ordered_childs->size() > 0) + a2.insert(begin(a2), begin(*tree2_ordered_childs), end(*tree2_ordered_childs)); + if(a1.size() > 0) + a1.erase(begin(a1)); + if(a2.size() > 0) + a2.erase(begin(a2)); + + //append the rest + if(iocnt == OCNT_ONE_POSITION_THEN_ORDERED) + merged = mm->MergeSequences(a1, a2); + else if(iocnt == OCNT_ONE_POSITION_THEN_PAIRED) + merged = mm->MergeUnorderedSetsOfPairs(a1, a2); + generalized_node->GetOrderedChildNodes().insert(end(generalized_node->GetOrderedChildNodes()), begin(merged), end(merged)); + break; + } + + case OCNT_PAIRED: + generalized_node->SetOrderedChildNodes(mm->MergeUnorderedSetsOfPairs(*tree1_ordered_childs, *tree2_ordered_childs)); + break; + + case OCNT_POSITION: + generalized_node->SetOrderedChildNodes(mm->MergePositions(*tree1_ordered_childs, *tree2_ordered_childs)); + break; + + default: + break; + } + } + + return generalized_node; +} + +EvaluableNode *EvaluableNodeTreeManipulation::MutateTree(Interpreter *interpreter, EvaluableNodeManager *enm, + EvaluableNode *tree, double mutation_rate, + CompactHashMap *mutation_weights, + CompactHashMap *evaluable_node_weights) +{ + std::vector strings; + EvaluableNode::ReferenceSetType checked; + GetStringsFromTree(tree, strings, checked); + + MutationParameters::WeightedRandEvaluableNodeType operation_type_wrs; + if(evaluable_node_weights != nullptr && !evaluable_node_weights->empty()) + operation_type_wrs.Initialize(*evaluable_node_weights, true); + + MutationParameters::WeightedRandMutationType rand_mutation_type; + if(mutation_weights != nullptr && !mutation_weights->empty()) + rand_mutation_type.Initialize(*mutation_weights, true); + + MutationParameters mp(interpreter, enm, mutation_rate, &strings, operation_type_wrs.IsInitialized() ? &operation_type_wrs : &evaluableNodeTypeRandomStream, rand_mutation_type.IsInitialized() ? &rand_mutation_type : &mutationOperationTypeRandomStream); + EvaluableNode *ret = MutateTree(mp, tree); + + return ret; +} + +void EvaluableNodeTreeManipulation::ReplaceStringsInTree(EvaluableNode *tree, CompactHashMap &to_replace) +{ + EvaluableNode::ReferenceSetType checked; + ReplaceStringsInTree(tree, to_replace, checked); +} + +EvaluableNodeType EvaluableNodeTreeManipulation::GetRandomEvaluableNodeType(RandomStream *rs) +{ + if(rs == nullptr) + return ENT_NOT_A_BUILT_IN_TYPE; + + return evaluableNodeTypeRandomStream.WeightedDiscreteRand(*rs); +} + +MergeMetricResults EvaluableNodeTreeManipulation::NumberOfSharedNodes(EvaluableNode *tree1, EvaluableNode *tree2, + MergeMetricResultsCache &memoized, EvaluableNode::ReferenceSetType *checked) +{ + if(tree1 == nullptr && tree2 == nullptr) + return MergeMetricResults(1.0, tree1, tree2, false, true); + + //if one is null and the other isn't, then stop + if( (tree1 == nullptr && tree2 != nullptr) || (tree1 != nullptr && tree2 == nullptr) ) + return MergeMetricResults(0.0, tree1, tree2, false, false); + + //if the pair of nodes has already been computed, then just return the result + auto found = memoized.find(std::make_pair(tree1, tree2)); + if(found != end(memoized)) + return found->second; + + if(checked != nullptr) + { + //if either is already checked, then neither adds shared nodes + if(checked->find(tree1) != end(*checked) || checked->find(tree2) != end(*checked)) + return MergeMetricResults(0.0, tree1, tree2, false, true); + } + + //if the trees are the same, then just return the size + if(tree1 == tree2) + { + MergeMetricResults results(static_cast(EvaluableNode::GetDeepSize(tree1)), tree1, tree2, true, true); + memoized.emplace(std::make_pair(tree1, tree2), results); + return results; + } + + //check current top nodes + auto commonality = CommonalityBetweenNodes(tree1, tree2); + + //see if can exit early, before inserting the nodes into the checked list and then removing them + size_t tree1_ordered_nodes_size = 0; + size_t tree1_mapped_nodes_size = 0; + size_t tree2_ordered_nodes_size = 0; + size_t tree2_mapped_nodes_size = 0; + + if(tree1->IsAssociativeArray()) + tree1_mapped_nodes_size = tree1->GetMappedChildNodesReference().size(); + else if(!tree1->IsImmediate()) + tree1_ordered_nodes_size = tree1->GetOrderedChildNodesReference().size(); + + if(tree2->IsAssociativeArray()) + tree2_mapped_nodes_size = tree2->GetMappedChildNodesReference().size(); + else if(!tree2->IsImmediate()) + tree2_ordered_nodes_size = tree2->GetOrderedChildNodesReference().size(); + + if(tree1_ordered_nodes_size == 0 && tree2_ordered_nodes_size == 0 + && tree1_mapped_nodes_size == 0 && tree2_mapped_nodes_size == 0) + { + memoized.emplace(std::make_pair(tree1, tree2), commonality); + return commonality; + } + + if(checked != nullptr) + { + //remember that it has already checked when traversing tree, and then remove from checked at the end of the function + checked->insert(tree1); + checked->insert(tree2); + } + + if(tree1_ordered_nodes_size > 0 && tree2_ordered_nodes_size > 0) + { + auto iocnt = GetInstructionOrderedChildNodeType(tree1->GetType()); + + //if there's only one node in each, then just use OCNT_POSITION because + // it's more efficient and the pairing doesn't matter + if(tree1_ordered_nodes_size < 2 && tree2_ordered_nodes_size < 2) + iocnt = OCNT_POSITION; + + switch(iocnt) + { + case OCNT_UNORDERED: + { + std::vector a2(tree2->GetOrderedChildNodesReference()); + + //for every element in a1, check to see if there's any in a2 + for(auto &a1_current : tree1->GetOrderedChildNodesReference()) + { + //find the node that best matches this one, greedily + bool best_match_found = false; + size_t best_match_index = 0; + MergeMetricResults best_match_value(0.0, tree1, tree2, false, false); + for(size_t match_index = 0; match_index < a2.size(); match_index++) + { + auto match_value = NumberOfSharedNodes(a1_current, a2[match_index], memoized, checked); + if(!best_match_found || match_value > best_match_value) + { + best_match_found = true; + best_match_value = match_value; + best_match_index = match_index; + + if(best_match_value.mustMatch || best_match_value.exactMatch) + break; + } + } + + //if found a match, then remove it from the match list and put it in the list + if(best_match_found) + { + //count this for whatever match it is + commonality += best_match_value; + + a2.erase(begin(a2) + best_match_index); + } + } + break; + } + + case OCNT_ORDERED: + case OCNT_ONE_POSITION_THEN_ORDERED: + { + auto &ocn1 = tree1->GetOrderedChildNodesReference(); + auto &ocn2 = tree2->GetOrderedChildNodesReference(); + auto size1 = ocn1.size(); + auto size2 = ocn2.size(); + + size_t starting_index = 0; + + if(iocnt == OCNT_ONE_POSITION_THEN_ORDERED) + { + auto smallest_list_size = std::min(size1, size2); + if(smallest_list_size >= 1) + commonality += NumberOfSharedNodes(ocn1[0], ocn2[0], memoized, checked); + + starting_index = 1; + } + + FlatMatrix> sequence_commonality; + ComputeSequenceCommonalityMatrix(sequence_commonality, ocn1, ocn2, + [&memoized, checked] + (EvaluableNode *a, EvaluableNode *b) + { + return EvaluableNodeTreeManipulation::NumberOfSharedNodes(a, b, memoized, checked); + }, starting_index); + + commonality += sequence_commonality.At(size1, size2); + break; + } + + case OCNT_PAIRED: + case OCNT_ONE_POSITION_THEN_PAIRED: + { + std::vector a1(tree1->GetOrderedChildNodesReference()); + std::vector a2(tree2->GetOrderedChildNodesReference()); + + if(iocnt == OCNT_ONE_POSITION_THEN_PAIRED) + { + auto smallest_list_size = std::min(a1.size(), a2.size()); + if(smallest_list_size >= 1) + { + commonality += NumberOfSharedNodes(a1[0], a2[0], memoized, checked); + + a1.erase(begin(a1)); + a2.erase(begin(a2)); + } + } + + //for every element in a1, check to see if there's any in a2 + while(a1.size() > 0 && a2.size() > 0) + { + //find the key (even numbered) node that best matches this one, greedily + bool best_match_found = false; + size_t best_match_index = 0; + MergeMetricResults best_match_key(0.0, nullptr, nullptr, false, false); + MergeMetricResults best_match_value(0.0, nullptr, nullptr, false, false); + + for(size_t match_index = 0; match_index < a2.size(); match_index += 2) + { + auto match_key = NumberOfSharedNodes(a1[0], a2[match_index], memoized, checked); + + // key match dominates value match + if(!best_match_found || match_key > best_match_key) + { + best_match_found = true; + best_match_key = match_key; + best_match_index = match_index; + + //count the value node commonality as long as it exists and is nontrivial + if(match_key.IsNontrivialMatch() && a1.size() > 1 && a2.size() > match_index + 1) + best_match_value = NumberOfSharedNodes(a1[1], a2[match_index + 1], memoized, checked); + else + best_match_value = MergeMetricResults(0.0, nullptr, nullptr, false, false); + } + } + + //if found a match, then remove it from the match list and put it in the list + if(best_match_found) + { + //remove the key node + a2.erase(begin(a2) + best_match_index); + //also remove the value node if it exists; use same index because it will be shifted down + if(a2.size() > 0 && a2.size() > best_match_index) + a2.erase(begin(a2) + best_match_index); + + //count this for whatever match it is + commonality += best_match_key; + commonality += best_match_value; + } + + //remove a potential pair from the first list + a1.erase(begin(a1)); + if(a1.size() > 0) //make sure that there's a second in the pair + a1.erase(begin(a1)); + } + break; + } + + case OCNT_POSITION: + { + auto &ocn1 = tree1->GetOrderedChildNodesReference(); + auto &ocn2 = tree2->GetOrderedChildNodesReference(); + //use size of smallest list + auto smallest_list_size = std::min(ocn1.size(), ocn2.size()); + for(size_t i = 0; i < smallest_list_size; i++) + commonality += NumberOfSharedNodes(ocn1[i], ocn2[i], memoized, checked); + + break; + } + + } + } + + if(tree1_mapped_nodes_size > 0 && tree2_mapped_nodes_size > 0) + { + //use keys from first node + for(auto &[node_id, node] : tree1->GetMappedChildNodes()) + { + //skip unless both trees have the key + auto other_node = tree2->GetMappedChildNodes().find(node_id); + if(other_node == end(tree2->GetMappedChildNodes())) + continue; + + commonality += NumberOfSharedNodes(node, other_node->second, memoized, checked); + } + } + + //if not exact match of nodes and all child nodes, then check all child nodes for better submatches + if(!commonality.exactMatch) + { + if(tree1_ordered_nodes_size > 0) + { + for(auto node : tree1->GetOrderedChildNodesReference()) + { + auto sub_match = NumberOfSharedNodes(tree2, node, memoized, checked); + if(sub_match > commonality) + commonality = sub_match; + } + } + else if(tree1_mapped_nodes_size > 0) + { + for(auto &[node_id, node] : tree1->GetMappedChildNodes()) + { + auto sub_match = NumberOfSharedNodes(tree2, node, memoized, checked); + if(sub_match > commonality) + commonality = sub_match; + } + } + + if(tree2_ordered_nodes_size > 0) + { + for(auto cn : tree2->GetOrderedChildNodesReference()) + { + auto sub_match = NumberOfSharedNodes(tree1, cn, memoized, checked); + if(sub_match > commonality) + commonality = sub_match; + } + } + else if(tree2_mapped_nodes_size > 0) + { + for(auto &[node_id, node] : tree2->GetMappedChildNodes()) + { + auto sub_match = NumberOfSharedNodes(tree1, node, memoized, checked); + if(sub_match > commonality) + commonality = sub_match; + } + } + } + + if(checked != nullptr) + { + //remove from the checked list so don't block other traversals + checked->erase(tree1); + checked->erase(tree2); + } + + memoized.emplace(std::make_pair(tree1, tree2), commonality); + return commonality; +} + +bool EvaluableNodeTreeManipulation::NonCycleDoesTreeContainLabels(EvaluableNode *en) +{ + if(en->GetNumLabels() > 0) + return true; + + for(auto cn : en->GetOrderedChildNodes()) + { + if(cn == nullptr) + continue; + + if(NonCycleDoesTreeContainLabels(cn)) + return true; + } + + for(auto &[_, cn] : en->GetMappedChildNodes()) + { + if(cn == nullptr) + continue; + + if(NonCycleDoesTreeContainLabels(cn)) + return true; + } + + return false; +} + +bool EvaluableNodeTreeManipulation::DoesTreeContainLabels(EvaluableNode *en, EvaluableNode::ReferenceSetType &checked) +{ + auto [_, inserted] = checked.insert(en); + if(!inserted) + return false; + + if(en->GetNumLabels() > 0) + return true; + + for(auto cn : en->GetOrderedChildNodes()) + { + if(cn == nullptr) + continue; + + if(DoesTreeContainLabels(cn, checked)) + return true; + } + + for(auto &[_, cn] : en->GetMappedChildNodes()) + { + if(cn == nullptr) + continue; + + if(DoesTreeContainLabels(cn, checked)) + return true; + } + + return false; +} + +bool EvaluableNodeTreeManipulation::CollectLabelIndexesFromNormalTree(EvaluableNode *tree, Entity::LabelsAssocType &index, EvaluableNode::ReferenceSetType *checked) +{ + if(tree == nullptr) + return false; + + //attempt to insert, but if has already been checked and in checked list (circular code), then return false + if(checked != nullptr && checked->insert(tree).second == false) + return false; + + size_t num_labels = tree->GetNumLabels(); + for(size_t i = 0; i < num_labels; i++) + { + auto label_sid = tree->GetLabelStringId(i); + const std::string &label_name = string_intern_pool.GetStringFromID(label_sid); + + if(label_name.size() == 0) + continue; + + //ignore labels that have a # in the beginning + if(label_name[0] == '#') + continue; + + //attempt to put the label in the index + auto [_, inserted] = index.insert(std::make_pair(label_sid, tree)); + + //if label already exists + if(!inserted) + return true; + } + + if(tree->IsAssociativeArray()) + { + for(auto &[_, e] : tree->GetMappedChildNodesReference()) + { + if(CollectLabelIndexesFromNormalTree(e, index, checked)) + return true; + } + } + else if(tree->IsOrderedArray()) + { + for(auto &e : tree->GetOrderedChildNodesReference()) + { + if(CollectLabelIndexesFromNormalTree(e, index, checked)) + return true; + } + } + + return false; +} + +void EvaluableNodeTreeManipulation::CollectAllLabelIndexesFromTree(EvaluableNode *tree, Entity::LabelsAssocType &index, EvaluableNode::ReferenceSetType *checked) +{ + if(tree == nullptr) + return; + + //attempt to insert, but if has already been checked and in checked list (circular code), then return false + if(checked != nullptr && checked->insert(tree).second == false) + return; + + size_t num_labels = tree->GetNumLabels(); + for(size_t i = 0; i < num_labels; i++) + { + auto label_sid = tree->GetLabelStringId(i); + const std::string &label_name = string_intern_pool.GetStringFromID(label_sid); + + if(label_name.size() == 0) + continue; + + //ignore labels that have a # in the beginning + if(label_name[0] == '#') + continue; + + //attempt to put the label in the index + index.insert(std::make_pair(label_sid, tree)); + } + + if(tree->IsAssociativeArray()) + { + for(auto &[_, e] : tree->GetMappedChildNodesReference()) + CollectAllLabelIndexesFromTree(e, index, checked); + } + else if(tree->IsOrderedArray()) + { + for(auto &e : tree->GetOrderedChildNodesReference()) + CollectAllLabelIndexesFromTree(e, index, checked); + } +} + +bool EvaluableNodeTreeManipulation::CollectLabelIndexesFromTreeAndMakeLabelNormalizationPass(EvaluableNode *tree, Entity::LabelsAssocType &index, + EvaluableNode::ReferenceSetType &checked, EvaluableNode *&replace_tree_by) +{ + if(tree == nullptr) + return false; + + //attempt to insert, but if has already been checked and in checked list (circular code), then return false + if(checked.insert(tree).second == false) + return false; + + //if this node has any labels, insert them and check for collisions + size_t num_labels = tree->GetNumLabels(); + for(size_t i = 0; i < num_labels; i++) + { + auto label_sid = tree->GetLabelStringId(i); + const std::string &label_name = string_intern_pool.GetStringFromID(label_sid); + + if(label_name.size() == 0) + continue; + + //ignore labels that have a # in the beginning + if(label_name[0] == '#') + continue; + + //attempt to put the label in the index + const auto &[inserted_value, inserted] = index.insert(std::make_pair(label_sid, tree)); + + //if label already exists + if(!inserted) + { + replace_tree_by = inserted_value->second; + + //add any labels from this tree if they are not on the existing node that has the label + if(replace_tree_by != nullptr) + replace_tree_by->SetLabelsStringIds(EvaluableNodeTreeManipulation::UnionStringIDVectors(tree->GetLabelsStringIds(), replace_tree_by->GetLabelsStringIds())); + + //more than one thing points to this label + return true; + } + } + + //traverse child nodes. If find a replacement, then mark as such to return, and if need immediate replacement of a node, then do so + // continue to iterate over all children even if have a replacement, to reduce the total number of passes needed over the tree + bool had_any_replacement = false; + if(tree->IsAssociativeArray()) + { + for(auto &[_, e] : tree->GetMappedChildNodesReference()) + { + EvaluableNode *replace_node_by = nullptr; + auto replacement = CollectLabelIndexesFromTreeAndMakeLabelNormalizationPass(e, index, checked, replace_node_by); + + if(replacement) + { + had_any_replacement = true; + if(replace_node_by != nullptr) + e = replace_node_by; + } + } + } + else if(tree->IsOrderedArray()) + { + for(auto &e : tree->GetOrderedChildNodes()) + { + EvaluableNode *replace_node_by = nullptr; + bool replacement = CollectLabelIndexesFromTreeAndMakeLabelNormalizationPass(e, index, checked, replace_node_by); + + if(replacement) + { + had_any_replacement = true; + if(replace_node_by != nullptr) + e = replace_node_by; + } + } + } + + return had_any_replacement; +} + +MergeMetricResults EvaluableNodeTreeManipulation::CommonalityBetweenNodes(EvaluableNode *n1, EvaluableNode *n2) +{ + if(n1 == nullptr && n2 == nullptr) + return MergeMetricResults(1.0, n1, n2, false, true); + + if(n1 == nullptr || n2 == nullptr) + return MergeMetricResults(0.0, n1, n2, false, false); + + size_t num_common_labels; + size_t num_unique_labels; + EvaluableNode::GetNodeCommonAndUniqueLabelCounts(n1, n2, num_common_labels, num_unique_labels); + + auto [_, commonality] = CommonalityBetweenNodeTypesAndValues(n1, n2); + + //if no labels, as is usually the case, then just address normal commonality + // and if the nodes are exactly equal + if(num_unique_labels == 0) + return MergeMetricResults(commonality, n1, n2, false, commonality == 1.0); + + return MergeMetricResults(commonality + num_common_labels, n1, n2, num_common_labels == num_unique_labels, commonality == 1.0); +} + +std::pair EvaluableNodeTreeManipulation::CommonalityBetweenNodeTypesAndValues( + EvaluableNode *n1, EvaluableNode *n2, bool require_exact_node_match) +{ + bool n1_null = EvaluableNode::IsNull(n1); + bool n2_null = EvaluableNode::IsNull(n2); + if(n1_null && n2_null) + return std::make_pair(n1, 1.0); + + //if either is nullptr, then use an actual EvaluableNode + if(n1 == nullptr) + n1 = &nullEvaluableNode; + if(n2 == nullptr) + n2 = &nullEvaluableNode; + + auto n1_type = n1->GetType(); + auto n2_type = n2->GetType(); + + //can have much faster and lighter computations if only checking for exact matches + if(require_exact_node_match) + { + if(n1_type != n2_type) + return std::make_pair(n1, 0.0); + + if(n1_type == ENT_NUMBER) + { + double n1_value = n1->GetNumberValueReference(); + double n2_value = n2->GetNumberValueReference(); + return std::make_pair(n1, EqualIncludingNaN(n1_value, n2_value) ? 1.0 : 0.0); + } + if(n1->IsStringValue()) + { + auto n1_sid = n1->GetStringID(); + auto n2_sid = n2->GetStringID(); + return std::make_pair(n1, n1_sid == n2_sid ? 1.0 : 0.0); + } + return std::make_pair(n1, 1.0); + } + + //compare similar types that are not the same, or types that have immediate comparisons + //if the types are the same, it'll be caught below + switch(n1_type) + { + case ENT_SEQUENCE: + if(n2_type == ENT_PARALLEL) return std::make_pair(n1, 0.25); + if(n2_type == ENT_NULL) return std::make_pair(n2, 0.125); + if(n2_type == ENT_LIST) return std::make_pair(n2, 0.125); + break; + + case ENT_PARALLEL: + if(n2_type == ENT_SEQUENCE) return std::make_pair(n2, 0.25); + if(n2_type == ENT_NULL) return std::make_pair(n2, 0.125); + if(n2_type == ENT_LIST) return std::make_pair(n2, 0.125); + break; + + case ENT_CALL: + if(n2_type == ENT_CALL_SANDBOXED) return std::make_pair(n1, 0.25); + break; + + case ENT_CALL_SANDBOXED: + if(n2_type == ENT_CALL) return std::make_pair(n2, 0.25); + break; + + case ENT_LET: + if(n2_type == ENT_DECLARE) return std::make_pair(n2, 0.5); + break; + + case ENT_DECLARE: + if(n2_type == ENT_LET) return std::make_pair(n1, 0.5); + break; + + case ENT_REDUCE: + if(n2_type == ENT_APPLY) return std::make_pair(n1, 0.125); + break; + + case ENT_APPLY: + if(n2_type == ENT_REDUCE) return std::make_pair(n2, 0.125); + break; + + case ENT_SET: + if(n2_type == ENT_REPLACE) return std::make_pair(n2, 0.5); + break; + + case ENT_REPLACE: + if(n2_type == ENT_SET) return std::make_pair(n1, 0.5); + break; + + case ENT_ASSOC: + if(n2_type == ENT_ASSOCIATE) return std::make_pair(n1, 0.25); + break; + + case ENT_ASSOCIATE: + if(n2_type == ENT_ASSOC) return std::make_pair(n2, 0.25); + break; + + case ENT_TRUE: + if(n2_type == ENT_FALSE) return std::make_pair(n1, 0.375); + if(n2_type == ENT_NUMBER || n2_type == ENT_NULL) + { + double n2_value = EvaluableNode::ToNumber(n2); + if(n2_value) + return std::make_pair(n2, 0.875); + return std::make_pair(n2, 0.125); + } + break; + + case ENT_FALSE: + if(n2_type == ENT_TRUE) return std::make_pair(n1, 0.375); + if(n2_type == ENT_NUMBER || n2_type == ENT_NULL) + { + double n2_value = EvaluableNode::ToNumber(n2); + if(n2_value == 0.0) + return std::make_pair(n2, 0.875); + if(FastIsNaN(n2_value)) + return std::make_pair(n2, 0.5); + return std::make_pair(n2, 0.375); + } + break; + + case ENT_NULL: + if(n2_type == ENT_TRUE) return std::make_pair(n1, 0.25); + if(n2_type == ENT_FALSE) return std::make_pair(n1, 0.5); + if(n2_type == ENT_NUMBER) + { + double n2_value = EvaluableNode::ToNumber(n2); + if(n2_value == 0.0) + return std::make_pair(n2, 0.5); + if(FastIsNaN(n2_value)) + return std::make_pair(n2, 0.875); + return std::make_pair(n2, 0.375); + } + if(n2_type == ENT_SEQUENCE) return std::make_pair(n1, 0.125); + if(n2_type == ENT_PARALLEL) return std::make_pair(n1, 0.125); + if(n2_type == ENT_LIST) return std::make_pair(n1, 0.125); + break; + + case ENT_LIST: + if(n2_type == ENT_SEQUENCE) return std::make_pair(n1, 0.125); + if(n2_type == ENT_PARALLEL) return std::make_pair(n1, 0.125); + if(n2_type == ENT_NULL) return std::make_pair(n1, 0.125); + break; + + case ENT_NUMBER: + { + double n1_value = n1->GetNumberValueReference(); + + if(n2_type == ENT_TRUE) + { + if(n1_value) + return std::make_pair(n2, 0.875); + return std::make_pair(n1, 0.375); + } + + if(n2_type == ENT_FALSE) + { + if(n1_value == 0.0) + return std::make_pair(n1, 0.875); + if(FastIsNaN(n1_value)) + return std::make_pair(n1, 0.5); + return std::make_pair(n1, 0.375); + } + + if(n2_type == ENT_NULL) + { + if(n1_value == 0.0) + return std::make_pair(n1, 0.5); + if(FastIsNaN(n1_value)) + return std::make_pair(n1, 0.875); + return std::make_pair(n1, 0.375); + } + + if(n2_type == ENT_NUMBER) + { + double n2_value = n2->GetNumberValueReference(); + if(EqualIncludingNaN(n1_value, n2_value)) + return std::make_pair(n1, 1.0); + + if(FastIsNaN(n1_value) || FastIsNaN(n2_value)) + return std::make_pair(n1, 0.25); + + double commonality = CommonalityBetweenNumbers(n1_value, n2_value); + double commonality_including_type = std::max(0.25, commonality); + + if(n1_type == ENT_NUMBER) + return std::make_pair(n1, commonality_including_type); + else + return std::make_pair(n2, commonality_including_type); + } + + if(n2_type == ENT_RAND) + return std::make_pair(n1, 0.25); + + //can't match with any other type + return std::make_pair(nullptr, 0.0); + } + + case ENT_RAND: + if(n2_type == ENT_NUMBER) + return std::make_pair(n1, 0.125); + break; + + case ENT_STRING: + if(n2_type == ENT_STRING) + { + auto n1sid = n1->GetStringID(); + auto n2sid = n2->GetStringID(); + return std::make_pair(n1, CommonalityBetweenStrings(n1sid, n2sid)); + } + + //can't match with any other type + return std::make_pair(nullptr, 0.0); + + case ENT_SYMBOL: + if(n2_type == ENT_SYMBOL) + { + if(n2->GetStringID() == n1->GetStringID()) + return std::make_pair(n1, 1.0); + else + return std::make_pair(n1, 0.25); + } + break; + + default: + break; + } + + if(n1_type == n2_type) + return std::make_pair(n1, 1.0); + + //different type, how close? + if(IsEvaluableNodeTypeQuery(n1_type) && IsEvaluableNodeTypeQuery(n2_type)) + return std::make_pair(n1, 0.25); + + //see if compatible opcode ordering + if(GetInstructionOrderedChildNodeType(n1_type) == GetInstructionOrderedChildNodeType(n2_type)) + return std::make_pair(n1, 0.125); + + return std::make_pair(nullptr, 0.0); +} + +std::string GenerateRandomString(RandomStream &rs) +{ + //make the length between 1 and 32, with a mean of 6 + int string_length = std::min(32, static_cast(rs.ExponentialRand(3.0)) + 1 + static_cast(rs.Rand() * 4)); + std::string retval; + retval.reserve(string_length); + static const std::string samples("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"); + for(int i = 0; i < string_length; i++) + { + auto sample = samples[rs.RandSize(samples.length())]; + retval.push_back(sample); + } + return retval; +} + +std::string GenerateRandomStringGivenStringSet(RandomStream &rs, std::vector &strings, double novel_chance = 0.08) +{ + if(strings.size() == 0 || rs.Rand() < novel_chance) //small but nontrivial chance of making a new string + { + std::string s = GenerateRandomString(rs); + //put the string into the list of considered strings + strings.emplace_back(s); + return s; + } + else //use randomly chosen existing string + { + size_t rand_index = rs.RandSize(strings.size()); + return std::string(strings[rand_index]); + } +} + +//helper function for EvaluableNodeTreeManipulation::MutateNode to populate immediate data +void MutateImmediateNode(EvaluableNode *n, RandomStream &rs, std::vector &strings) +{ + if(DoesEvaluableNodeTypeUseNumberData(n->GetType())) + { + double cur_value = n->GetNumberValue(); + + //if it's a NaN, then sometimes randomly replace it with a non-null value (which can be mutated further below) + if(FastIsNaN(cur_value) && rs.Rand() < 0.9) + cur_value = rs.Rand(); + + //50% chance of being negative if negative, 50% of that 50% if positive (minimizing assumptions - a number can be either) + bool is_negative = (cur_value < 0.0); + bool new_number_negative = (rs.Rand() < (is_negative ? 0.5 : 0.25)); + double new_value = rs.ExponentialRand(fabs(cur_value)); + + //chance to keep it an integer if it is already an integer + double int_part; + bool is_integer = (std::modf(cur_value, &int_part) == 0.0); + if(is_integer && (rs.Rand() < 0.5)) + new_value = std::round(new_value); + + if(rs.Rand() < 0.01) + { + if(rs.Rand() < 0.5) + new_value = std::numeric_limits::infinity(); + else + new_value = std::numeric_limits::quiet_NaN(); + } + + n->SetNumberValue((new_number_negative ? -1 : 1) * new_value); + } + else if(DoesEvaluableNodeTypeUseStringData(n->GetType())) + { + n->SetStringValue(GenerateRandomStringGivenStringSet(rs, strings)); + } +} + +EvaluableNode *EvaluableNodeTreeManipulation::MutateNode(EvaluableNode *n, MutationParameters &mp) +{ + if(n == nullptr) + return nullptr; + + //if immediate type (after initial mutation), see if should mutate value + bool is_immediate = n->IsImmediate(); + if(is_immediate) + { + if(mp.interpreter->randomStream.Rand() < 0.5) + MutateImmediateNode(n, mp.interpreter->randomStream, *mp.strings); + } + + StringInternPool::StringID mutation_type = mp.randMutationType->WeightedDiscreteRand(mp.interpreter->randomStream); + //only mark for likely deletion if null has no parameters + if(n->GetType() == ENT_NULL && n->GetOrderedChildNodes().size() == 0 && n->GetMappedChildNodes().size() && mp.interpreter->randomStream.Rand() < 0.5) + mutation_type = ENBISI_delete; + + //if immediate, can't perform most of the mutations, just mutate it + if(is_immediate && (mutation_type != ENBISI_change_label && mutation_type != ENBISI_change_type)) + mutation_type = ENBISI_change_type; + + switch(mutation_type) + { + case ENBISI_change_type: + n->SetType(mp.randEvaluableNodeType->WeightedDiscreteRand(mp.interpreter->randomStream), mp.enm); + if(IsEvaluableNodeTypeImmediate(n->GetType())) + MutateImmediateNode(n, mp.interpreter->randomStream, *mp.strings); + break; + + case ENBISI_delete: + if(n->GetOrderedChildNodes().size() > 0) + { + size_t num_children = n->GetOrderedChildNodes().size(); + size_t replace_with = mp.interpreter->randomStream.RandSize(num_children); + n = mp.enm->AllocNode(n->GetOrderedChildNodes()[replace_with]); + } + else if(n->GetMappedChildNodes().size() > 0) + { + size_t num_children = n->GetMappedChildNodes().size(); + double replace_with = mp.interpreter->randomStream.Rand() * num_children; + //iterate over child nodes until find the right index + for(auto &[_, cn] : n->GetMappedChildNodes()) + { + if(replace_with < 1.0) + { + n = mp.enm->AllocNode(cn); + break; + } + replace_with--; + } + + } + else + n->SetType(ENT_NULL, mp.enm); + break; + + case ENBISI_insert: + { + //use some heuristics to generate some random immediate value + EvaluableNode *new_node = mp.enm->AllocNode(mp.randEvaluableNodeType->WeightedDiscreteRand(mp.interpreter->randomStream)); + + //give it a respectable default before randomizing + if(DoesEvaluableNodeTypeUseNumberData(new_node->GetType())) + n->SetNumberValue(50); + if(DoesEvaluableNodeTypeUseStringData(new_node->GetType())) + n->SetStringValue("string"); + + MutateImmediateNode(n, mp.interpreter->randomStream, *mp.strings); + if(n->IsAssociativeArray()) + { + // get a random key + std::string key = GenerateRandomStringGivenStringSet(mp.interpreter->randomStream, *mp.strings); + n->SetMappedChildNode(key, new_node); + } + else + n->AppendOrderedChildNode(new_node); + break; + } + + case ENBISI_swap_elements: + if(n->GetOrderedChildNodes().size() > 0) + { + size_t num_child_nodes = n->GetOrderedChildNodesReference().size(); + auto first_index = mp.interpreter->randomStream.RandSize(num_child_nodes); + auto second_index = mp.interpreter->randomStream.RandSize(num_child_nodes); + std::swap(n->GetOrderedChildNodes()[first_index], n->GetOrderedChildNodes()[second_index]); + } + else if(n->GetMappedChildNodes().size() > 0) + { + size_t num_child_nodes = n->GetMappedChildNodesReference().size(); + auto first_index = mp.interpreter->randomStream.RandSize(num_child_nodes); + auto second_index = mp.interpreter->randomStream.RandSize(num_child_nodes); + + auto first_entry = begin(n->GetMappedChildNodes()); + while(first_index > 0 && first_entry != end(n->GetMappedChildNodes())) + { + first_entry++; + first_index++; + } + + auto second_entry = begin(n->GetMappedChildNodes()); + while(second_index > 0 && second_entry != end(n->GetMappedChildNodes())) + { + second_entry++; + second_index++; + } + + std::swap(first_entry->second, second_entry->second); + } + break; + + case ENBISI_deep_copy_elements: + if(n->GetOrderedChildNodes().size() > 0) + { + size_t num_children = n->GetOrderedChildNodesReference().size(); + size_t source_index = mp.interpreter->randomStream.RandSize(num_children); + size_t destination_index = mp.interpreter->randomStream.RandSize(num_children + 1); + if(destination_index >= num_children) + n->AppendOrderedChildNode(mp.enm->DeepAllocCopy(n->GetOrderedChildNodes()[source_index])); + else + n = n->GetOrderedChildNodes()[destination_index] = mp.enm->DeepAllocCopy(n->GetOrderedChildNodes()[source_index]); + } + else if(n->GetMappedChildNodes().size() > 0) + { + auto num_children = n->GetMappedChildNodesReference().size(); + size_t source_index = mp.interpreter->randomStream.RandSize(num_children); + EvaluableNode *source_node = nullptr; + size_t destination_index = mp.interpreter->randomStream.RandSize(num_children + 1); + //iterate over child nodes until find the right index + for(auto &[_, cn] : n->GetMappedChildNodes()) + { + if(source_index < 1) + { + source_node = cn; + break; + } + source_index--; + } + + for(auto &[_, cn] : n->GetMappedChildNodes()) + { + if(destination_index < 1) + { + cn = mp.enm->DeepAllocCopy(source_node); + destination_index--; + break; + } + destination_index--; + } + + //need to create a new key + if(destination_index > 0) + { + std::string new_key = GenerateRandomStringGivenStringSet(mp.interpreter->randomStream, *mp.strings, 0.6); + n->SetMappedChildNode(new_key, mp.enm->DeepAllocCopy(source_node)); + } + } + break; + + case ENBISI_delete_elements: + n->ClearOrderedChildNodes(); + n->ClearMappedChildNodes(); + break; + + case ENBISI_change_label: + //affect labels + if(n != nullptr) + { + //see if can delete a label, and delete all if the option is available and chosen to keep new label creation balanced + if(n->GetNumLabels() > 0 && mp.interpreter->randomStream.Rand() < 0.875) + { + n->ClearLabels(); + } + else + { + //add new label + std::string new_label = GenerateRandomStringGivenStringSet(mp.interpreter->randomStream, *mp.strings); + n->AppendLabel(new_label); + } + } + break; + + default: + //error, don't do anything + break; + } + + //clear excess nulls (with no child nodes) in lists + if(n != nullptr) + { + while(!n->GetOrderedChildNodes().empty() + && (n->GetOrderedChildNodes().back() == nullptr + || (n->GetOrderedChildNodes().back()->GetOrderedChildNodes().size() == 0 + && n->GetOrderedChildNodes().back()->GetMappedChildNodes().size() == 0) )) + { + //either remove this one or stop removing + if(mp.interpreter->randomStream.Rand() > 0.125) + n->GetOrderedChildNodes().pop_back(); + else + break; + } + } + + return n; +} + +EvaluableNode *EvaluableNodeTreeManipulation::MutateTree(MutationParameters &mp, EvaluableNode *tree) +{ + if(tree == nullptr) + return nullptr; + + //if this object has already been copied, then just return the reference to the new copy + auto found_copy = mp.references.find(tree); + if(found_copy != end(mp.references)) + return found_copy->second; + + EvaluableNode *copy = mp.enm->AllocNode(tree); + auto node_stack = mp.interpreter->CreateInterpreterNodeStackStateSaver(copy); + + //shouldn't happen, but just to be safe + if(copy == nullptr) + return nullptr; + + if(mp.interpreter->randomStream.Rand() < mp.mutation_rate) + { + EvaluableNode *new_node = MutateNode(copy, mp); + //make sure have the right node to reference if it's a new node + if(new_node != copy) + { + copy = new_node; + + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(new_node); + } + } + + (mp.references)[tree] = copy; + + //this shouldn't happen - it should be a node of type ENT_NULL, but check just in case + if(copy == nullptr) + return nullptr; + + if(copy->IsAssociativeArray()) + { + //for any mapped children, copy and update + for(auto &[_, s] : copy->GetMappedChildNodesReference()) + { + //get current item in list + EvaluableNode *n = s; + if(n == nullptr) + continue; + + //turn into a copy and mutate + n = MutateTree(mp, n); + + //replace current item in list with copy + s = n; + } + } + else + { + //for any ordered children, copy and update + auto &ocn = copy->GetOrderedChildNodes(); + for(size_t i = 0; i < ocn.size(); i++) + { + //get current item in list + EvaluableNode *n = ocn[i]; + if(n == nullptr) + continue; + + //turn into a copy and mutate + n = MutateTree(mp, n); + + //replace current item in list with copy + ocn[i] = n; + } + } + + return copy; +} + +void EvaluableNodeTreeManipulation::ReplaceStringsInTree(EvaluableNode *tree, CompactHashMap &to_replace, EvaluableNode::ReferenceSetType &checked) +{ + if(tree == nullptr) + return; + + //try to record, but if already checked, then don't do anything + auto [_, inserted] = checked.insert(tree); + if(!inserted) + return; + + if(tree->IsAssociativeArray()) + { + for(auto &[cn_id, cn] : tree->GetMappedChildNodesReference()) + ReplaceStringsInTree(cn, to_replace, checked); + } + else if(tree->IsImmediate()) + { + if(tree->GetType() == ENT_STRING) + { + auto replacement = to_replace.find(tree->GetStringID()); + if(replacement != end(to_replace)) + tree->SetStringID(replacement->second); + } + } + else //ordered + { + for(auto cn : tree->GetOrderedChildNodesReference()) + ReplaceStringsInTree(cn, to_replace, checked); + } +} + +void EvaluableNodeTreeManipulation::GetStringsFromTree(EvaluableNode *tree, std::vector &strings, EvaluableNode::ReferenceSetType &checked) +{ + if(tree == nullptr) + return; + + //try to record, but if already checked, then don't do anything + auto [_, inserted] = checked.insert(tree); + if(!inserted) + return; + + if(tree->IsAssociativeArray()) + { + for(auto &[cn_id, cn] : tree->GetMappedChildNodesReference()) + GetStringsFromTree(cn, strings, checked); + } + else if(tree->IsImmediate()) + { + if(DoesEvaluableNodeTypeUseStringData(tree->GetType())) + strings.emplace_back(tree->GetStringValue()); + } + else //ordered + { + for(auto &cn : tree->GetOrderedChildNodesReference()) + GetStringsFromTree(cn, strings, checked); + } +} + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) +thread_local std::vector EvaluableNodeTreeManipulation::aCharsBuffer; +thread_local std::vector EvaluableNodeTreeManipulation::bCharsBuffer; +thread_local FlatMatrix EvaluableNodeTreeManipulation::sequenceCommonalityBuffer; +#else +std::vector EvaluableNodeTreeManipulation::aCharsBuffer; +std::vector EvaluableNodeTreeManipulation::bCharsBuffer; +FlatMatrix EvaluableNodeTreeManipulation::sequenceCommonalityBuffer; +#endif + +EvaluableNode EvaluableNodeTreeManipulation::nullEvaluableNode(ENT_NULL); + +CompactHashMap EvaluableNodeTreeManipulation::mutationOperationTypeProbabilities +{ + { ENBISI_change_type, 0.28 }, + { ENBISI_delete, 0.12 }, + { ENBISI_insert, 0.23 }, + { ENBISI_swap_elements, 0.24 }, + { ENBISI_deep_copy_elements,0.05 }, + { ENBISI_delete_elements, 0.04 }, + { ENBISI_change_label, 0.04 } +}; + +EvaluableNodeTreeManipulation::MutationParameters::WeightedRandMutationType EvaluableNodeTreeManipulation::mutationOperationTypeRandomStream(mutationOperationTypeProbabilities, true); + +CompactHashMap EvaluableNodeTreeManipulation::evaluableNodeTypeProbabilities +{ + //built-in / system specific + {ENT_SYSTEM, 0.05}, + {ENT_GET_DEFAULTS, 0.01}, + + //parsing + {ENT_PARSE, 0.05}, + {ENT_UNPARSE, 0.05}, + + //core control + {ENT_IF, 1.0}, + {ENT_SEQUENCE, 0.5}, + {ENT_PARALLEL, 0.5}, + {ENT_LAMBDA, 1.5}, + {ENT_CONCLUDE, 0.05}, + {ENT_CALL, 1.5}, + {ENT_CALL_SANDBOXED, 0.25}, + {ENT_WHILE, 0.1}, + + //definitions + {ENT_LET, 0.95}, + {ENT_DECLARE, 0.5}, + {ENT_ASSIGN, 0.95}, + {ENT_ACCUM, 0.25}, + {ENT_RETRIEVE, 0.1}, + + //retrieval + {ENT_GET, 3.0}, + {ENT_SET, 0.35}, + {ENT_REPLACE, 0.1}, + + //stack and node manipulation + {ENT_TARGET, 0.1}, + {ENT_TARGET_INDEX, 0.1}, + {ENT_TARGET_VALUE, 0.1}, + {ENT_STACK, 0.05}, + {ENT_ARGS, 0.08}, + + //simulation and operations + {ENT_RAND, 0.4}, + {ENT_WEIGHTED_RAND, 0.02}, + {ENT_GET_RAND_SEED, 0.02}, + {ENT_SET_RAND_SEED, 0.02}, + {ENT_SYSTEM_TIME, 0.01}, + + //base math + {ENT_ADD, 0.9}, + {ENT_SUBTRACT, 0.65}, + {ENT_MULTIPLY, 0.65}, + {ENT_DIVIDE, 0.6}, + {ENT_MODULUS, 0.2}, + {ENT_GET_DIGITS, 0.1}, + {ENT_SET_DIGITS, 0.1}, + {ENT_FLOOR, 0.6}, + {ENT_CEILING, 0.6}, + {ENT_ROUND, 0.6}, + + //extended math + {ENT_EXPONENT, 0.4}, + {ENT_LOG, 0.4}, + + {ENT_SIN, 0.2}, + {ENT_ASIN, 0.2}, + {ENT_COS, 0.2}, + {ENT_ACOS, 0.2}, + {ENT_TAN, 0.2}, + {ENT_ATAN, 0.2}, + + {ENT_SINH, 0.07}, + {ENT_ASINH, 0.07}, + {ENT_COSH, 0.07}, + {ENT_ACOSH, 0.07}, + {ENT_TANH, 0.07}, + {ENT_ATANH, 0.07}, + + {ENT_ERF, 0.05}, + {ENT_TGAMMA, 0.07}, + {ENT_LGAMMA, 0.07}, + + {ENT_SQRT, 0.2}, + {ENT_POW, 0.2}, + {ENT_ABS, 0.4}, + {ENT_MAX, 0.4}, + {ENT_MIN, 0.4}, + {ENT_DOT_PRODUCT, 0.2}, + {ENT_GENERALIZED_DISTANCE, 0.15}, + + //list manipulation + {ENT_FIRST, 0.65}, + {ENT_TAIL, 0.65}, + {ENT_LAST, 0.65}, + {ENT_TRUNC, 0.65}, + {ENT_APPEND, 0.65}, + {ENT_SIZE, 0.6}, + {ENT_RANGE, 0.5}, + + //transformation + {ENT_REWRITE, 0.1}, + {ENT_MAP, 1.1}, + {ENT_FILTER, 0.5}, + {ENT_WEAVE, 0.2}, + {ENT_REDUCE, 0.7}, + {ENT_APPLY, 0.5}, + {ENT_REVERSE, 0.4}, + {ENT_SORT, 0.5}, + + //associative list manipulation + {ENT_INDICES, 0.5}, + {ENT_VALUES, 0.5}, + {ENT_CONTAINS_INDEX, 0.5}, + {ENT_CONTAINS_VALUE, 0.5}, + {ENT_REMOVE, 0.5}, + {ENT_KEEP, 0.5}, + {ENT_ASSOCIATE, 0.8}, + {ENT_ZIP, 0.35}, + {ENT_UNZIP, 0.25}, + + //logic + {ENT_AND, 0.75}, + {ENT_OR, 0.75}, + {ENT_XOR, 0.75}, + {ENT_NOT, 0.75}, + + //equivalence + {ENT_EQUAL, 1.2}, + {ENT_NEQUAL, 0.65}, + {ENT_LESS, 0.85}, + {ENT_LEQUAL, 0.85}, + {ENT_GREATER, 0.85}, + {ENT_GEQUAL, 0.85}, + {ENT_TYPE_EQUALS, 0.1}, + {ENT_TYPE_NEQUALS, 0.1}, + + //built-in constants and variables + {ENT_TRUE, 0.1}, + {ENT_FALSE, 0.1}, + {ENT_NULL, 0.75}, + + //data types + {ENT_LIST, 2.5}, + {ENT_ASSOC, 3.0}, + {ENT_NUMBER, 8.0}, + {ENT_STRING, 4.0}, + {ENT_SYMBOL, 25.0}, + + //node types + {ENT_GET_TYPE, 0.25}, + {ENT_GET_TYPE_STRING, 0.25}, + {ENT_SET_TYPE, 0.35}, + {ENT_FORMAT, 0.05}, + + //labels and comments + {ENT_GET_LABELS, 0.1}, + {ENT_GET_ALL_LABELS, 0.05}, + {ENT_SET_LABELS, 0.1}, + {ENT_ZIP_LABELS, 0.02}, + + {ENT_GET_COMMENTS, 0.05}, + {ENT_SET_COMMENTS, 0.05}, + + {ENT_GET_CONCURRENCY, 0.01}, + {ENT_SET_CONCURRENCY, 0.01}, + + {ENT_GET_VALUE, 0.15}, + {ENT_SET_VALUE, 0.15}, + + //string + {ENT_EXPLODE, 0.02}, + {ENT_SPLIT, 0.2}, + {ENT_SUBSTR, 0.2}, + {ENT_CONCAT, 0.2}, + + //encryption + {ENT_CRYPTO_SIGN, 0.01}, + {ENT_CRYPTO_SIGN_VERIFY, 0.01}, + {ENT_ENCRYPT, 0.01}, + {ENT_DECRYPT, 0.01}, + + //I/O + {ENT_PRINT, 0.01}, + + //tree merging + {ENT_TOTAL_SIZE, 0.2}, + {ENT_MUTATE, 0.2}, + {ENT_COMMONALITY, 0.2}, + {ENT_EDIT_DISTANCE, 0.2}, + {ENT_INTERSECT, 0.2}, + {ENT_UNION, 0.2}, + {ENT_DIFFERENCE, 0.2}, + {ENT_MIX, 0.2}, + {ENT_MIX_LABELS, 0.2}, + + //entity merging + {ENT_TOTAL_ENTITY_SIZE, 0.02}, + {ENT_FLATTEN_ENTITY, 0.02}, + {ENT_MUTATE_ENTITY, 0.02}, + {ENT_COMMONALITY_ENTITIES, 0.02}, + {ENT_EDIT_DISTANCE_ENTITIES, 0.02}, + {ENT_INTERSECT_ENTITIES, 0.02}, + {ENT_UNION_ENTITIES, 0.02}, + {ENT_DIFFERENCE_ENTITIES, 0.02}, + {ENT_MIX_ENTITIES, 0.02}, + + //entity details + {ENT_GET_ENTITY_COMMENTS, 0.01}, + {ENT_RETRIEVE_ENTITY_ROOT, 0.01}, + {ENT_ASSIGN_ENTITY_ROOTS, 0.01}, + {ENT_ACCUM_ENTITY_ROOTS, 0.01}, + {ENT_GET_ENTITY_RAND_SEED, 0.01}, + {ENT_SET_ENTITY_RAND_SEED, 0.01}, + {ENT_GET_ENTITY_ROOT_PERMISSION, 0.01}, + {ENT_SET_ENTITY_ROOT_PERMISSION, 0.01}, + + //entity base actions + {ENT_CREATE_ENTITIES, 0.1}, + {ENT_CLONE_ENTITIES, 0.1}, + {ENT_MOVE_ENTITIES, 0.15}, + {ENT_DESTROY_ENTITIES, 0.1}, + {ENT_LOAD, 0.01}, + {ENT_LOAD_ENTITY, 0.01}, + {ENT_LOAD_PERSISTENT_ENTITY, 0.01}, + {ENT_STORE, 0.01}, + {ENT_STORE_ENTITY, 0.01}, + {ENT_CONTAINS_ENTITY, 0.1}, + + //entity query + {ENT_CONTAINED_ENTITIES, 0.3}, + {ENT_COMPUTE_ON_CONTAINED_ENTITIES, 0.3}, + {ENT_QUERY_SELECT, 0.2}, + {ENT_QUERY_SAMPLE, 0.2}, + {ENT_QUERY_WEIGHTED_SAMPLE, 0.2}, + {ENT_QUERY_IN_ENTITY_LIST, 0.2}, + {ENT_QUERY_NOT_IN_ENTITY_LIST, 0.2}, + {ENT_QUERY_COUNT, 0.2}, + {ENT_QUERY_EXISTS, 0.2}, + {ENT_QUERY_NOT_EXISTS, 0.2}, + {ENT_QUERY_EQUALS, 0.2}, + {ENT_QUERY_NOT_EQUALS, 0.2}, + {ENT_QUERY_BETWEEN, 0.2}, + {ENT_QUERY_NOT_BETWEEN, 0.2}, + {ENT_QUERY_AMONG, 0.2}, + {ENT_QUERY_NOT_AMONG, 0.2}, + {ENT_QUERY_MAX, 0.2}, + {ENT_QUERY_MIN, 0.2}, + {ENT_QUERY_SUM, 0.2}, + {ENT_QUERY_MODE, 0.2}, + {ENT_QUERY_QUANTILE, 0.2}, + {ENT_QUERY_GENERALIZED_MEAN, 0.2}, + {ENT_QUERY_MIN_DIFFERENCE, 0.2}, + {ENT_QUERY_MAX_DIFFERENCE, 0.2}, + {ENT_QUERY_VALUE_MASSES, 0.2}, + {ENT_QUERY_GREATER_OR_EQUAL_TO, 0.2}, + {ENT_QUERY_LESS_OR_EQUAL_TO, 0.2}, + {ENT_QUERY_WITHIN_GENERALIZED_DISTANCE, 0.2}, + {ENT_QUERY_NEAREST_GENERALIZED_DISTANCE, 0.2}, + + {ENT_COMPUTE_ENTITY_CONVICTIONS, 0.2}, + {ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE, 0.2}, + {ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS, 0.2}, + {ENT_COMPUTE_ENTITY_KL_DIVERGENCES, 0.2}, + + //entity access + {ENT_CONTAINS_LABEL, 0.5}, + {ENT_ASSIGN_TO_ENTITIES, 0.5}, + {ENT_DIRECT_ASSIGN_TO_ENTITIES, 0.01}, + {ENT_ACCUM_TO_ENTITIES, 0.5}, + {ENT_RETRIEVE_FROM_ENTITY, 0.5}, + {ENT_DIRECT_RETRIEVE_FROM_ENTITY, 0.01}, + {ENT_CALL_ENTITY, 0.5}, + {ENT_CALL_ENTITY_GET_CHANGES, 0.05}, + {ENT_CALL_CONTAINER, 0.5} +}; + +EvaluableNodeTreeManipulation::MutationParameters::WeightedRandEvaluableNodeType EvaluableNodeTreeManipulation::evaluableNodeTypeRandomStream(evaluableNodeTypeProbabilities, true); diff --git a/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h new file mode 100644 index 00000000..89a56006 --- /dev/null +++ b/src/Amalgam/evaluablenode/EvaluableNodeTreeManipulation.h @@ -0,0 +1,590 @@ +#pragma once + +//project headers: +#include "EvaluableNode.h" +#include "EvaluableNodeManagement.h" +#include "HashMaps.h" +#include "Interpreter.h" +#include "Merger.h" +#include "WeightedDiscreteRandomStream.h" + +//system headers: +#include +#include +#include +#include +#include + +//Functor to transform EvaluableNode into doubles +class EvaluableNodeAsDouble +{ +public: + inline double operator()(EvaluableNode *en) + { + return EvaluableNode::ToNumber(en); + } +}; + +//hashing for pairs of pointers +template +struct std::hash> +{ + inline size_t operator()(std::pair const &pointer_pair) const + { + size_t h1 = std::hash{}(pointer_pair.first); + size_t h2 = std::hash{}(pointer_pair.second); + return h1 ^ (h2 << 1); + } +}; + +//equality for pairs of pointers +template +constexpr bool operator==(const std::pair &a, const std::pair &b) +{ + return a.first == b.first && a.second == b.second; +} + +//for caching pairs of EvaluableNode *'s into MergeMetricResults +typedef FastHashMap, MergeMetricResults> MergeMetricResultsCache; + +//returns a commonality measure of difference between numbers a and b in [0,1] +//if the numbers are equal, returns 1, returns closer to 0 the less similar they are +inline double NumberCommonality(double difference, double a, double b) +{ + double max_abs = std::max(std::fabs(a), std::fabs(b)); + //since this is called frequently in comparing and merging, and perfect accuracy isn't required, just use fast version + double difference_commonality = FastExp(-difference / max_abs); + return difference_commonality; +} + +//for random streams that are based on an EvaluableNode MappedChildNodes +typedef WeightedDiscreteRandomStreamTransform + EvaluableNodeMappedWeightedDiscreteRandomStreamTransform; + +class EvaluableNodeTreeManipulation +{ +public: + class MutationParameters + { + public: + typedef WeightedDiscreteRandomStreamTransform> WeightedRandEvaluableNodeType; + + typedef WeightedDiscreteRandomStreamTransform> WeightedRandMutationType; + + Interpreter *interpreter; + EvaluableNodeManager *enm; + double mutation_rate; + std::vector *strings; + EvaluableNode::ReferenceAssocType references; + WeightedRandEvaluableNodeType *randEvaluableNodeType; + WeightedRandMutationType *randMutationType; + + MutationParameters(Interpreter *interpreter, + EvaluableNodeManager *enm, + double mutation_rate, + std::vector *strings, + WeightedRandEvaluableNodeType *rand_operation, + WeightedRandMutationType *rand_operation_type) : + interpreter(nullptr), + enm(nullptr), + mutation_rate(0), + strings(nullptr), + references(EvaluableNode::ReferenceAssocType()), + randEvaluableNodeType(&evaluableNodeTypeRandomStream), + randMutationType(&mutationOperationTypeRandomStream) + { + this->interpreter = interpreter; + this->enm = enm; + this->mutation_rate = mutation_rate; + this->strings = strings; + this->randEvaluableNodeType = rand_operation; + this->randMutationType = rand_operation_type; + } + }; + + static CompactHashMap mutationOperationTypeProbabilities; + static CompactHashMap evaluableNodeTypeProbabilities; + + //functionality to merge two nodes + class NodesMergeMethod : public Merger + { + public: + NodesMergeMethod(EvaluableNodeManager *_enm, + bool keep_all_of_both, bool require_exact_matches) + : enm(_enm), keepAllOfBoth(keep_all_of_both), + requireExactMatches(require_exact_matches) + { } + + virtual MergeMetricResults MergeMetric(EvaluableNode *a, EvaluableNode *b) + { + if((a != nullptr && a->GetNeedCycleCheck()) || (b != nullptr && b->GetNeedCycleCheck())) + { + EvaluableNode::ReferenceSetType checked; + return NumberOfSharedNodes(a, b, memoizedMergeMetricResults, &checked); + } + else //don't need to check for cycles + { + return NumberOfSharedNodes(a, b, memoizedMergeMetricResults, nullptr); + } + } + + virtual EvaluableNode *MergeValues(EvaluableNode *a, EvaluableNode *b, bool must_merge = false) + { return MergeTrees(this, a, b); } + + virtual bool KeepAllNonMergeableValues() + { return keepAllOfBoth; } + + virtual bool KeepSomeNonMergeableValues() + { return keepAllOfBoth; } + + virtual bool KeepNonMergeableValue() + { return keepAllOfBoth; } + + virtual bool KeepNonMergeableAInsteadOfB() + { return keepAllOfBoth; } + + virtual bool KeepNonMergeableA() + { return keepAllOfBoth; } + virtual bool KeepNonMergeableB() + { return keepAllOfBoth; } + + virtual bool AreMergeable(EvaluableNode *a, EvaluableNode *b); + + virtual EvaluableNode::ReferenceAssocType &GetReferences() + { return references; } + + constexpr bool RequireExactMatches() + { return requireExactMatches; } + + //use for allocating + EvaluableNodeManager *enm; + + protected: + bool keepAllOfBoth; + bool requireExactMatches; + EvaluableNode::ReferenceAssocType references; + MergeMetricResultsCache memoizedMergeMetricResults; + }; + + //functionality to mix nodes + class NodesMixMethod : public NodesMergeMethod + { + public: + NodesMixMethod(RandomStream random_stream, EvaluableNodeManager *_enm, + double fraction_a, double fraction_b, double similar_mix_chance); + + virtual EvaluableNode *MergeValues(EvaluableNode *a, EvaluableNode *b, bool must_merge = false); + + virtual bool KeepAllNonMergeableValues() + { return false; } + + virtual bool KeepSomeNonMergeableValues() + { return true; } + + virtual bool KeepNonMergeableValue() + { + return randomStream.Rand() < fractionAOrB; + } + + virtual bool KeepNonMergeableAInsteadOfB() + { + return randomStream.Rand() < fractionAInsteadOfB; + } + + virtual bool KeepNonMergeableA() + { + return randomStream.Rand() < fractionA; + } + virtual bool KeepNonMergeableB() + { + return randomStream.Rand() < fractionB; + } + + virtual bool AreMergeable(EvaluableNode *a, EvaluableNode *b); + + protected: + + RandomStream randomStream; + + double fractionA; + double fractionB; + double fractionAOrB; + double fractionAInsteadOfB; + double similarMixChance; + }; + + //functionality to merge sequences of strings (e.g., for comments) + class StringSequenceMergeMetric : public Merger + { + public: + constexpr StringSequenceMergeMetric(bool keep_all_of_both) + : keepAllOfBoth(keep_all_of_both) + { } + + virtual MergeMetricResults MergeMetric(std::string *a, std::string *b); + + virtual std::string *MergeValues(std::string *a, std::string *b, bool must_merge = false); + + virtual bool KeepAllNonMergeableValues() + { return keepAllOfBoth; } + + virtual bool KeepSomeNonMergeableValues() + { return keepAllOfBoth; } + + virtual bool KeepNonMergeableValue() + { return keepAllOfBoth; } + + virtual bool KeepNonMergeableAInsteadOfB() + { return keepAllOfBoth; } + + virtual bool KeepNonMergeableA() + { return keepAllOfBoth; } + virtual bool KeepNonMergeableB() + { return keepAllOfBoth; } + + virtual bool AreMergeable(std::string *a, std::string *b) + { + if(a == b) + return true; + return (a != nullptr && b != nullptr && *a == *b); + } + + protected: + bool keepAllOfBoth; + }; + + //functionality to mix utf-8 strings + class StringsMixMethodUtf8 : public Merger + { + public: + StringsMixMethodUtf8(RandomStream random_stream, double fraction_a, double fraction_b); + + virtual MergeMetricResults MergeMetric(uint32_t a, uint32_t b) + { + if(a == b) + return MergeMetricResults(1.0, a, b); + else + return MergeMetricResults(0.0, a, b); + } + + virtual uint32_t MergeValues(uint32_t a, uint32_t b, bool must_merge = false) + { + if(b == 0) + return a; + if(a == 0) + return b; + + if(KeepNonMergeableAInsteadOfB()) + return a; + return b; + } + + virtual bool KeepAllNonMergeableValues() + { return false; } + + virtual bool KeepSomeNonMergeableValues() + { return true; } + + virtual bool KeepNonMergeableValue() + { + return randomStream.Rand() < fractionAOrB; + } + + virtual bool KeepNonMergeableAInsteadOfB() + { + return randomStream.Rand() < fractionAInsteadOfB; + } + + virtual bool KeepNonMergeableA() + { + return randomStream.Rand() < fractionA; + } + virtual bool KeepNonMergeableB() + { + return randomStream.Rand() < fractionB; + } + + virtual bool AreMergeable(uint32_t a, uint32_t b) + { return a == b; } + + protected: + RandomStream randomStream; + + double fractionA; + double fractionB; + double fractionAOrB; + double fractionAInsteadOfB; + double similarMixChance; + }; + + //Tree and string merging functions + static EvaluableNode *IntersectTrees(EvaluableNodeManager *enm, EvaluableNode *tree1, EvaluableNode *tree2); + + static EvaluableNode *UnionTrees(EvaluableNodeManager *enm, EvaluableNode *tree1, EvaluableNode *tree2); + + static EvaluableNode *MixTrees(RandomStream random_stream, EvaluableNodeManager *enm, EvaluableNode *tree1, EvaluableNode *tree2, + double fraction_a, double fraction_b, double similar_mix_chance); + + static EvaluableNode *MixTreesByCommonLabels(Interpreter *interpreter, EvaluableNodeManager *enm, + EvaluableNodeReference tree1, EvaluableNodeReference tree2, RandomStream &rs, double fraction_a, double fraction_b); + + static std::string MixStrings(const std::string &a, const std::string &b, RandomStream random_stream, double fraction_a, double fraction_b); + + //returns a number between 0 and 1, where 1 is exactly the same and 0 is maximally different + static inline double CommonalityBetweenNumbers(double n1, double n2) + { + if(EqualIncludingNaN(n1, n2)) + return 1.0; + + double commonality = NumberCommonality(std::fabs(n1 - n2), n1, n2); + return commonality; + } + + //returns the commonality between two strings that are different + static constexpr double CommonalityBetweenStrings(StringInternPool::StringID sid1, StringInternPool::StringID sid2) + { + if(sid1 == sid2) + return 1.0; + + //if either is not a string, then maximal non-matchage + if(sid1 == string_intern_pool.NOT_A_STRING_ID || sid2 == string_intern_pool.NOT_A_STRING_ID) + return 0.125; + + const auto &s1 = string_intern_pool.GetStringFromID(sid1); + const auto &s2 = string_intern_pool.GetStringFromID(sid2); + + size_t s1_len = 0; + size_t s2_len = 0; + size_t difference = EditDistance(s1, s2, s1_len, s2_len); + + double commonality = NumberCommonality(static_cast(difference), + static_cast(s1_len), static_cast(s2_len)); + + return commonality; + } + + //returns the EditDistance between the sequences a and b using the specified sequence_commonality_buffer + template + static size_t EditDistance(std::vector &a, std::vector &b, + FlatMatrix &sequence_commonality_buffer) + { + //if either string is empty, return the other + size_t a_size = a.size(); + size_t b_size = b.size(); + if(a_size == 0) + return b_size; + if(b_size == 0) + return a_size; + + ComputeSequenceCommonalityMatrix(sequence_commonality_buffer, a, b, + [] (ElementType a, ElementType b) + { + return (a == b ? 1 : 0); + }); + + //edit distance is the longest sequence's size minus the commonality + return std::max(a_size, b_size) - sequence_commonality_buffer.At(a_size, b_size); + } + + //returns the EditDistance between the sequences a and b + template + inline static size_t EditDistance(std::vector &a, std::vector &b) + { + FlatMatrix sequence_commonality; + return EditDistance(a, b, sequence_commonality); + } + + //computes the edit distance (Levenshtein distance) between the two utf-8 strings + inline static size_t EditDistance(const std::string &a, const std::string &b) + { + StringManipulation::ExplodeUTF8Characters(a, aCharsBuffer); + StringManipulation::ExplodeUTF8Characters(b, bCharsBuffer); + return EvaluableNodeTreeManipulation::EditDistance(aCharsBuffer, bCharsBuffer, sequenceCommonalityBuffer); + } + + //computes the edit distance (Levenshtein distance) between the two utf-8 strings + //a_size and b_size are set to the length of the strings respectively + inline static size_t EditDistance(const std::string &a, const std::string &b, + size_t &a_len, size_t &b_len) + { + StringManipulation::ExplodeUTF8Characters(a, aCharsBuffer); + a_len = aCharsBuffer.size(); + + StringManipulation::ExplodeUTF8Characters(b, bCharsBuffer); + b_len = bCharsBuffer.size(); + + return EvaluableNodeTreeManipulation::EditDistance(aCharsBuffer, bCharsBuffer, sequenceCommonalityBuffer); + } + + //computes the edit distance between the two trees + static double EditDistance(EvaluableNode *tree1, EvaluableNode *tree2) + { + auto shared_nodes = NumberOfSharedNodes(tree1, tree2); + size_t tree_1_size = EvaluableNode::GetDeepSize(tree1); + size_t tree_2_size = EvaluableNode::GetDeepSize(tree2); + + //find the distance to edit from tree1 to shared, then from shared to tree_2. Shared is the smallest, so subtract from each. + return (tree_1_size - shared_nodes.commonality) + (tree_2_size - shared_nodes.commonality); + } + + //Computes the total number of nodes in both trees that are equal + inline static MergeMetricResults NumberOfSharedNodes(EvaluableNode *tree1, EvaluableNode *tree2) + { + MergeMetricResultsCache memoized; + if((tree1 != nullptr && tree1->GetNeedCycleCheck()) || (tree2 != nullptr && tree2->GetNeedCycleCheck())) + { + EvaluableNode::ReferenceSetType checked; + return NumberOfSharedNodes(tree1, tree2, memoized, &checked); + } + else //don't need to check for cycles + { + return NumberOfSharedNodes(tree1, tree2, memoized, nullptr); + } + } + + //Returns the total number of nodes in both trees that are equal, ignoring those in the checked set + // Assists the public function NumberOfSharedNodes + //if checked is nullptr, then it will not keep track of the nodes, which can be done if neither needs cycle checks + static MergeMetricResults NumberOfSharedNodes(EvaluableNode *tree1, EvaluableNode *tree2, + MergeMetricResultsCache &memoized, EvaluableNode::ReferenceSetType *checked); + + //returns true if the tree contains any labels + static bool DoesTreeContainLabels(EvaluableNode *en); + + //Recursively traverses tree, storing any nodes with labels into an index map, and returning the map, + // as well as a flag indicating whether the labels needed to be renormalized due to collision + //if force_normalization_on_en, it will normalize en by collapsing nodes that share the same label, to ensure that each label will only have a single node + // *Note* that force_normalization_on_en will modify en, potentially removing some nodes + // if, when normalizing, the tree is no longer cycle free, en_cycle_free will be set to false regardless of its initial value + static std::pair RetrieveLabelIndexesFromTreeAndNormalize(EvaluableNode *en); + + //like RetrieveNormalizedLabelIndexesFromTree, except collects all labels overwriting duplicates + inline static Entity::LabelsAssocType RetrieveLabelIndexesFromTree(EvaluableNode *en) + { + Entity::LabelsAssocType index; + EvaluableNode::ReferenceSetType checked; + + //can check faster if don't need to check for cycles + bool en_cycle_free = (en == nullptr || !en->GetNeedCycleCheck()); + CollectAllLabelIndexesFromTree(en, index, en_cycle_free ? nullptr : &checked); + return index; + } + + //Directly replaces all occurrences of code under label in tree (including potentially the root node) with replacement. + // replacement should be allocated by the appropriate allocator and may be modified if necessary + // for combining labels, etc. + static inline void ReplaceLabelInTree(EvaluableNode *&tree, StringInternPool::StringID label_id, EvaluableNode *replacement) + { + EvaluableNode::ReferenceSetType checked; + ReplaceLabelInTreeRecurse(tree, label_id, replacement, checked); + EvaluableNodeManager::UpdateFlagsForNodeTree(tree, checked); + } + + //If the nodes, n1 and n2 can be generalized, then returns a new (allocated) node that is preferable to use (usually the more specific one) + // If the nodes are not equivalent, then returns null + // Only extra data (labels, comments, etc.) that is common to both is kept, unless KeepAllNonMergeableValues is true. Then everything from both is kept. If + // KeepSomeNonMergeableValues is set and one node is null, it will return a copy of the non-null node + static EvaluableNode *CreateGeneralizedNode(NodesMergeMethod *mm, EvaluableNode *n1, EvaluableNode *n2); + + //returns the union of the two sets of labels + static std::vector UnionStringIDVectors( + const std::vector &label_list_a, const std::vector &label_list_b); + + //returs the intersection of the two sets of labels + static std::vector IntersectStringIDVectors( + const std::vector &label_list_a, const std::vector &label_list_b); + + //Returns a tree that consists of only nodes that are common across all of the trees specified, + // where all returned values are newly allocated and modifiable + static EvaluableNode *MergeTrees(NodesMergeMethod *mm, EvaluableNode *tree1, EvaluableNode *tree2); + + //Returns a tree that is a copy of tree but mutated based on mutation_rate + // will create the new tree with interpreter's evaluableNodeManager and will use interpreter's RandomStream + static EvaluableNode *MutateTree(Interpreter *interpreter, EvaluableNodeManager *enm, EvaluableNode *tree, double mutation_rate, CompactHashMap *mutation_weights, CompactHashMap *evaluable_node_weights); + + //traverses tree and replaces any string that matches a key of to_replace with the value in to_replace + static void ReplaceStringsInTree(EvaluableNode *tree, CompactHashMap &to_replace); + + //returns an EvaluableNodeType based on the probabilities specified by evaluableNodeTypeRandomStream + static EvaluableNodeType GetRandomEvaluableNodeType(RandomStream *rs); + +protected: + + //recursive helper function for DoesTreeContainLabels + static bool NonCycleDoesTreeContainLabels(EvaluableNode *en); + + //recursive helper function for DoesTreeContainLabels + static bool DoesTreeContainLabels(EvaluableNode *en, EvaluableNode::ReferenceSetType &checked); + + //Recursively traverses tree, storing any nodes with labels into index. + // If checked is not nullptr, then it keeps track of previously visited nodes in checked, ignoring them if they are already in the set, and adds them to checked when they are traversed. + // checked should only be nullptr when tree is known to be cycle free + //If there is a label collision, meaning the same label is used by more than one node, then it will exit early (not populating the index) and return true + //Returns false if no collision and nothing further is needed to be done. + static bool CollectLabelIndexesFromNormalTree(EvaluableNode *tree, Entity::LabelsAssocType &index, EvaluableNode::ReferenceSetType *checked); + + //like CollectLabelIndexesFromNormalTree but overwrites duplicate labels + static void CollectAllLabelIndexesFromTree(EvaluableNode *tree, Entity::LabelsAssocType &index, EvaluableNode::ReferenceSetType *checked); + + //Recursively traverses tree, storing any nodes with labels into index. Ignores any nodes already in checked, and adds them to checked when they are traversed. + // if the current top of the tree contains a label and should be replaced by something that exists in index, it will set replace_tree_by to the proper + // node that should replace the top of the tree. + //returns two boolean value if any have been replaced, meaning another pass must be made + // and the tree needs to be updated with regard to cycle checks + static bool CollectLabelIndexesFromTreeAndMakeLabelNormalizationPass(EvaluableNode *tree, Entity::LabelsAssocType &index, + EvaluableNode::ReferenceSetType &checked, EvaluableNode *&replace_tree_by); + + //recursive helper function for ReplaceLabelInTree + static void ReplaceLabelInTreeRecurse(EvaluableNode *&tree, StringInternPool::StringID label_id, + EvaluableNode *replacement, EvaluableNode::ReferenceSetType &checked); + + //Evaluates commonality metric between the two nodes passed in, including labels. 1.0 if identical, 0.0 if completely different, and some value between if similar + static MergeMetricResults CommonalityBetweenNodes(EvaluableNode *n1, EvaluableNode *n2); + + //Evaluates the functional commonality between the types and immediate values of n1 and n2 (excluding labels, comments, etc.) + // Returns a pair: the first value is the more general of the two nodes and the second is a commonality value + // The more general of the two nodes will be the one whose type is more general + // The commonality metric will return 1.0 if identical, 0.0 if completely different, and some value between if similar + // If require_exact_node_match is true, then it will only return 1.0 or 0.0 + //The EvaluableNode * returned should not be modified, nor should it be included in any data outside the scope of the caller + static std::pair CommonalityBetweenNodeTypesAndValues( + EvaluableNode *n1, EvaluableNode *n2, bool require_exact_node_match = false); + + //Mutates the current node n, changing its type or value, based on the mutation_rate + // strings contains a list of strings to likely choose from if mutating to a string value + // returns the new value, which may be n, a modification of n, or an entirely different node + static EvaluableNode *MutateNode(EvaluableNode *n, MutationParameters &mp); + + //random stream for EvaluableNodeType, so can obtain a random type from a useful distribution + static MutationParameters::WeightedRandEvaluableNodeType evaluableNodeTypeRandomStream; + + //Recursively creates a new tree using enm which is a copy of tree, but given a mutation_rate + // will create the new tree with interpreter's evaluableNodeManager + // strings is a list of strings to choose from when mutating and adding new strings + static EvaluableNode *MutateTree(MutationParameters &mp, EvaluableNode *tree); + + //traverses tree and replaces any string that matches a key of to_replace with the value in to_replace + static void ReplaceStringsInTree(EvaluableNode *tree, CompactHashMap &to_replace, EvaluableNode::ReferenceSetType &checked); + + //returns a set of strings that have appeared at least once in the given tree + static void GetStringsFromTree(EvaluableNode *tree, std::vector &strings, EvaluableNode::ReferenceSetType &checked); + + //random stream for MutationOperationType, so can obtain a random type from a useful distribution + static MutationParameters::WeightedRandMutationType mutationOperationTypeRandomStream; + + //reusable buffers for string distance and mixing +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + thread_local static std::vector aCharsBuffer; + thread_local static std::vector bCharsBuffer; + thread_local static FlatMatrix sequenceCommonalityBuffer; +#else + static std::vector aCharsBuffer; + static std::vector bCharsBuffer; + static FlatMatrix sequenceCommonalityBuffer; +#endif + + //used by CommonalityBetweenNodeTypesAndValues for returning a null + static EvaluableNode nullEvaluableNode; +}; diff --git a/src/Amalgam/importexport/FileSupportCSV.cpp b/src/Amalgam/importexport/FileSupportCSV.cpp new file mode 100644 index 00000000..657b4fca --- /dev/null +++ b/src/Amalgam/importexport/FileSupportCSV.cpp @@ -0,0 +1,210 @@ +//project headers: +#include "FileSupportCSV.h" + +//system headers: +#include +#include +#include + +EvaluableNode *FileSupportCSV::Load(const std::string &resource_path, EvaluableNodeManager *enm) +{ + auto [data, data_success] = Platform_OpenFileAsString(resource_path); + if(!data_success) + { + std::cerr << data << std::endl; + return EvaluableNodeReference::Null(); + } + + size_t file_size = data.size(); + + //check for byte order mark for UTF-8 that may optionally appear at the beginning of the file. + // If it is present, remove it. No other encoding standards besides ascii and UTF-8 are currently permitted. + if(file_size >= 3) + { + if(static_cast(data[0]) == 0xEF && static_cast(data[1]) == 0xBB && static_cast(data[2]) == 0xBF) + { + data.erase(0, 3); + file_size -= 3; + } + } + + EvaluableNode *data_top_node = enm->AllocNode(ENT_LIST); + + //position in the input + size_t cur_position = 0; + + //for each row + while(cur_position < file_size) + { + EvaluableNode *cur_row = enm->AllocNode(ENT_LIST); + data_top_node->GetOrderedChildNodes().push_back(cur_row); + + //instantiate this once so the memory can be reused and start with a reasonable string size + std::string value; + value.reserve(64); + + //for each column + while(cur_position < file_size) + { + //get this column's characters and value + size_t end_position = cur_position; + bool end_of_row = false; + value.clear(); + + //while in this column + while(end_position < file_size) + { + //if quoted string, get the rest of the quote + if(data[end_position] == '"') + { + //advance past quote + cur_position++; + end_position++; + + while(end_position < file_size) + { + //scan through everything that isn't a quote + if(data[end_position] != '"') + { + value.push_back(data[end_position]); + end_position++; + continue; + } + + //must be a quote, check for two in a row + if(end_position + 1 < file_size && data[end_position + 1] == '"') + { + value.push_back('"'); + //skip both quotes + end_position += 2; + continue; + } + + //must be a quote + end_position++; + break; + } + + //catch up the the cur_position because these characters have already been accounted for + cur_position = end_position; + } + + if(data[end_position] == ',') + break; + + if(data[end_position] == '\n' || data[end_position] == '\r') + { + end_of_row = true; + break; + } + + //keep accumulating this column + end_position++; + } + + //accumulate any remaining value + value.append(std::string(&data[cur_position], &data[end_position])); + + //move past extra terminating character if applicable + if(end_position + 1 < file_size && data[end_position] == '\r' && data[end_position + 1] == '\n') + end_position++; + //move past terminating character + end_position++; + + //create the value + EvaluableNode *element = nullptr; + if(value.size() > 0) + { + auto [float_value, success] = Platform_StringToNumber(value); + if(success) + element = enm->AllocNode(float_value); + else + element = enm->AllocNode(ENT_STRING, value); + } + cur_row->GetOrderedChildNodes().push_back(element); + + //start at next field + cur_position = end_position; + + if(end_of_row) + break; + } + } + + return data_top_node; +} + +//escapes a string per the CSV standard +// may return the original string +std::string EscapeCSVStringIfNeeded(std::string &s) +{ + if( s.find(',') == std::string::npos + && s.find('"') == std::string::npos + && s.find('\r') == std::string::npos + && s.find('\n') == std::string::npos) + return s; + + //need to put quotes around it and escape characters + std::string result; + result.reserve(s.size() + 2); + result.push_back('"'); + for(auto &c : s) + { + //quotes should be double quoted + if(c == '"') + result.push_back('"'); + result.push_back(c); + } + result.push_back('"'); + + return result; +} + +bool FileSupportCSV::Store(EvaluableNode *code, const std::string &resource_path, EvaluableNodeManager *enm) +{ + std::ofstream outf(resource_path, std::ios::out | std::ios::binary); + if(!outf.good()) + return false; + + //data to write + std::string data_string; + + if(code != nullptr) + { + //grab rows + for(auto &row_node : code->GetOrderedChildNodes()) + { + //if nothing, skip + if(row_node == nullptr) + { + data_string.push_back('\n'); + continue; + } + + bool is_first_column = true; + for(auto &column_node : row_node->GetOrderedChildNodes()) + { + //separate fields by commas + if(!is_first_column) + data_string.push_back(','); + else //must be first column, but no longer + is_first_column = false; + + //leave nulls blank + if(EvaluableNode::IsEmptyNode(column_node)) + continue; + + std::string original_string = EvaluableNode::ToString(column_node); + std::string escaped_str = EscapeCSVStringIfNeeded(original_string); + data_string.append(escaped_str); + } + + data_string.push_back('\n'); + } + } + + outf.write(data_string.c_str(), data_string.size()); + outf.close(); + + return true; +} diff --git a/src/Amalgam/importexport/FileSupportCSV.h b/src/Amalgam/importexport/FileSupportCSV.h new file mode 100644 index 00000000..ed7b01a1 --- /dev/null +++ b/src/Amalgam/importexport/FileSupportCSV.h @@ -0,0 +1,12 @@ +#pragma once + +//project headers: +#include "EvaluableNode.h" +#include "EvaluableNodeManagement.h" + +class FileSupportCSV +{ +public: + static EvaluableNode *Load(const std::string &resource_path, EvaluableNodeManager *enm); + static bool Store(EvaluableNode *code, const std::string &resource_path, EvaluableNodeManager *enm); +}; diff --git a/src/Amalgam/importexport/FileSupportJSON.cpp b/src/Amalgam/importexport/FileSupportJSON.cpp new file mode 100644 index 00000000..65e81ed9 --- /dev/null +++ b/src/Amalgam/importexport/FileSupportJSON.cpp @@ -0,0 +1,348 @@ +//project headers: +#include "FileSupportJSON.h" + +#include "EvaluableNodeTreeFunctions.h" +#include "FastMath.h" +#include "PlatformSpecific.h" +#include "StringManipulation.h" + +//3rd party headers: +#include "simdjson/simdjson.h" + +//system headers: +#include +#include +#include + +//per simdjson documentation, for multithreading, there should be one of these per thread +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) +thread_local +#endif +simdjson::ondemand::parser json_parser; + +//transform json to an Amalgam node tree. Only lists and assocs, and immediates are supported. +EvaluableNode *JsonToEvaluableNodeRecurse(EvaluableNodeManager *enm, simdjson::ondemand::value element) +{ + switch(element.type()) + { + case simdjson::ondemand::json_type::array: + { + EvaluableNode *node = enm->AllocNode(ENT_LIST); + for(auto e : element.get_array()) + node->AppendOrderedChildNode(JsonToEvaluableNodeRecurse(enm, e.value())); + + return node; + } + + case simdjson::ondemand::json_type::object: + { + EvaluableNode *node = enm->AllocNode(ENT_ASSOC); + for(auto e : element.get_object()) + { + std::string_view key_view = e.unescaped_key(); + std::string key(key_view); + node->SetMappedChildNode(key, JsonToEvaluableNodeRecurse(enm, e.value())); + } + + return node; + } + + case simdjson::ondemand::json_type::number: + return enm->AllocNode(element.get_double()); + + case simdjson::ondemand::json_type::string: + { + std::string_view str_view = element.get_string(); + std::string str(str_view); + return enm->AllocNode(ENT_STRING, str); + } + + case simdjson::ondemand::json_type::boolean: + if(element.get_bool()) + return enm->AllocNode(ENT_TRUE); + else + return enm->AllocNode(ENT_FALSE); + + case simdjson::ondemand::json_type::null: + return nullptr; + } + + return nullptr; +} + +//escapes str with json standards and appends to json_str +inline void EscapeAndAppendStringToJsonString(const std::string &str, std::string &json_str) +{ + json_str += '"'; + + for(size_t i = 0; i < str.size(); i++) + { + auto c = str[i]; + switch(c) + { + case '"': json_str += "\\\""; break; + case '\\': json_str += "\\\\"; break; + case '\b': json_str += "\\b"; break; + case '\f': json_str += "\\f"; break; + case '\n': json_str += "\\n"; break; + case '\r': json_str += "\\r"; break; + case '\t': json_str += "\\t"; break; + default: + { + if(static_cast(c) <= 0x1f) + { + //escape control characters + char buffer[8]; + snprintf(&buffer[0], sizeof(buffer), "\\u%04x", c); + json_str += &buffer[0]; + break; + } + + //the ECMA 404 standard for json makes no mention about LS and PS characters, but they are known + // to be a problem with some systems that use ECMA 262 versions prior to 10 (released in the year 2019) + // so we escape these two code points just to be safe on all systems + if(i + 3 < str.size()) + { + if(static_cast(c) == 0xe2) + { + //escape utf-8 line separator https://www.fileformat.info/info/unicode/char/2028/index.htm + if(static_cast(str[i + 1]) == 0x80 && static_cast(str[i + 2]) == 0xa8) + { + json_str += "\\u2028"; + i += 2; + break; + } + //escape utf-8 paragraph separator https://www.fileformat.info/info/unicode/char/2029/index.htm + else if(static_cast(str[i + 1]) == 0x80 && static_cast(str[i + 2]) == 0xa9) + { + json_str += "\\u2029"; + i += 2; + break; + } + } + } + + //wasn't a special character, just concatenate + json_str += c; + } + } + } + + json_str += '"'; +} + +//transform en to a json string +//en must be guaranteed to not be nullptr +//if sort_keys is true, it will sort all of the assoc keys +//returns true if it was able to create a json correctly, false if there was problematic data +bool EvaluableNodeToJsonStringRecurse(EvaluableNode *en, std::string &json_str, bool sort_keys) +{ + if(en->IsAssociativeArray()) + { + json_str += '{'; + + auto &mcn = en->GetMappedChildNodesReference(); + + if(!sort_keys) + { + bool first_cn = true; + for(auto &[cn_id, cn] : mcn) + { + if(!first_cn) + json_str += ','; + else + first_cn = false; + + const auto &str = string_intern_pool.GetStringFromID(cn_id); + EscapeAndAppendStringToJsonString(str, json_str); + + json_str += ':'; + + if(cn == nullptr) + json_str += "null"; + else + { + if(!EvaluableNodeToJsonStringRecurse(cn, json_str, sort_keys)) + return false; + } + } + } + else //sort_keys + { + std::vector key_sids; + key_sids.reserve(mcn.size()); + for(auto &[key, _] : mcn) + key_sids.push_back(key); + + std::sort(begin(key_sids), end(key_sids), StringIDNaturalCompareSort); + + for(size_t i = 0; i < key_sids.size(); i++) + { + auto k = mcn.find(key_sids[i]); + + if(i > 0) + json_str += ','; + + const auto &str = string_intern_pool.GetStringFromID(key_sids[i]); + EscapeAndAppendStringToJsonString(str, json_str); + + json_str += ':'; + + if(k->second == nullptr) + json_str += "null"; + else + { + if(!EvaluableNodeToJsonStringRecurse(k->second, json_str, sort_keys)) + return false; + } + } + } + + json_str += '}'; + } + else if(!en->IsImmediate()) + { + auto node_type = en->GetType(); + if(node_type == ENT_NULL) + { + json_str += "null"; + return true; + } + else if(node_type == ENT_TRUE) + { + json_str += "true"; + return true; + } + else if(node_type == ENT_FALSE) + { + json_str += "false"; + return true; + } + else if(node_type != ENT_LIST) + { + //must be a list, so return false as can't build + return false; + } + + json_str += '['; + + bool first_cn = true; + for(auto &cn : en->GetOrderedChildNodesReference()) + { + if(!first_cn) + json_str += ','; + else + first_cn = false; + + if(cn == nullptr) + { + json_str += "null"; + } + else + { + if(!EvaluableNodeToJsonStringRecurse(cn, json_str, sort_keys)) + return false; + } + } + + json_str += ']'; + } + else //immediate + { + if(DoesEvaluableNodeTypeUseNumberData(en->GetType())) + { + double number = en->GetNumberValue(); + + if(number == std::numeric_limits::infinity()) + json_str += StringManipulation::NumberToString(std::numeric_limits::max()); + else if(number == -std::numeric_limits::infinity()) + json_str += StringManipulation::NumberToString(std::numeric_limits::lowest()); + else if(FastIsNaN(number)) + return false; + else + json_str += StringManipulation::NumberToString(number); + } + else + { + const auto &str_value = en->GetStringValue(); + EscapeAndAppendStringToJsonString(str_value, json_str); + } + } + + return true; +} + +EvaluableNode *EvaluableNodeJSONTranslation::JsonToEvaluableNode(EvaluableNodeManager *enm, std::string_view json_str) +{ + auto json_padded = simdjson::padded_string(json_str); + auto json_top_element = json_parser.iterate(json_padded); + + try + { + return JsonToEvaluableNodeRecurse(enm, json_top_element); + } + catch(simdjson::simdjson_error &e) + { + //get rid of unused variable warning + (void)e; + return nullptr; + } +} + +std::string EvaluableNodeJSONTranslation::EvaluableNodeToJson(EvaluableNode *code, bool sort_keys) +{ + if(code == nullptr) + return "null"; + + //if need cycle check, double-check + if(!EvaluableNode::CanNodeTreeBeFlattened(code)) + return ""; + + //if successful return the json, otherwise return blank + std::string json_str; + if(EvaluableNodeToJsonStringRecurse(code, json_str, sort_keys)) + return json_str; + else + return ""; +} + +EvaluableNode *EvaluableNodeJSONTranslation::Load(const std::string &resource_path, EvaluableNodeManager *enm) +{ + std::string error_string; + if(!Platform_IsResourcePathAccessible(resource_path, true, error_string)) + { + std::cerr << "Error loading JSON: " << error_string << std::endl; + return nullptr; + } + + auto json_str = simdjson::padded_string::load(resource_path); + auto json_top_element = json_parser.iterate(json_str); + + try + { + return JsonToEvaluableNodeRecurse(enm, json_top_element); + } + catch(simdjson::simdjson_error &e) + { + //get rid of unused variable warning + (void)e; + std::cerr << "Error loading JSON, malformatted file " << resource_path << std::endl; + return nullptr; + } +} + +// Save node tree to disk as JSON. +bool EvaluableNodeJSONTranslation::Store(EvaluableNode *code, const std::string &resource_path, EvaluableNodeManager *enm, bool sort_keys) +{ + std::string error_string; + if(!Platform_IsResourcePathAccessible(resource_path, false, error_string)) + { + std::cerr << "Error storing JSON: " << error_string << std::endl; + return false; + } + + std::ofstream file(resource_path); + file << EvaluableNodeToJson(code, sort_keys); + + return true; +} diff --git a/src/Amalgam/importexport/FileSupportJSON.h b/src/Amalgam/importexport/FileSupportJSON.h new file mode 100644 index 00000000..25d05da3 --- /dev/null +++ b/src/Amalgam/importexport/FileSupportJSON.h @@ -0,0 +1,24 @@ +#pragma once + +//project headers: +#include "EvaluableNode.h" +#include "EvaluableNodeManagement.h" + +//system headers: +#include + +namespace EvaluableNodeJSONTranslation +{ + //converts JSON string_view to EvaluableNode tree + EvaluableNode *JsonToEvaluableNode(EvaluableNodeManager *enm, std::string_view json_str); + + //converts EvaluableNode tree to JSON string + // if sort_keys is true, it will sort all of the assoc keys + std::string EvaluableNodeToJson(EvaluableNode *code, bool sort_keys = false); + + //loads json file to EvaluableNode tree + EvaluableNode *Load(const std::string &resource_path, EvaluableNodeManager *enm); + + //stores EvaluableNode tree to json file + bool Store(EvaluableNode *code, const std::string &resource_path, EvaluableNodeManager *enm, bool sort_keys); +}; diff --git a/src/Amalgam/importexport/FileSupportYAML.cpp b/src/Amalgam/importexport/FileSupportYAML.cpp new file mode 100644 index 00000000..430236ec --- /dev/null +++ b/src/Amalgam/importexport/FileSupportYAML.cpp @@ -0,0 +1,207 @@ +//project headers: +#include "FileSupportYAML.h" + +#include "EvaluableNodeTreeFunctions.h" +#include "FastMath.h" +#include "PlatformSpecific.h" +#include "StringManipulation.h" + +//3rd party headers: +#define RYML_SINGLE_HDR_DEFINE_NOW +#include "rapidyaml/rapidyaml-0.5.0.hpp" + +//system headers: +#include +#include + +//transform yaml to an Amalgam node tree. Only lists and assocs, and immediates are supported. +EvaluableNode *YamlToEvaluableNodeRecurse(EvaluableNodeManager *enm, ryml::ConstNodeRef &element) +{ + if(element.is_seq()) + { + EvaluableNode *node = enm->AllocNode(ENT_LIST); + for(auto e : element.children()) + node->AppendOrderedChildNode(YamlToEvaluableNodeRecurse(enm, e)); + + return node; + } + + if(element.is_map()) + { + EvaluableNode *node = enm->AllocNode(ENT_ASSOC); + for(auto e : element.children()) + { + auto key_value = e.key(); + std::string key(key_value.begin(), key_value.end()); + node->SetMappedChildNode(key, YamlToEvaluableNodeRecurse(enm, e)); + } + + return node; + } + + if(element.val_is_null()) + return nullptr; + + auto value = element.val(); + std::string value_string(value.begin(), value.end()); + + if(value.is_number()) + { + auto [num, success] = Platform_StringToNumber(value_string); + if(!success) + return nullptr; + + return enm->AllocNode(num); + } + + //must be a string + return enm->AllocNode(ENT_STRING, value_string); +} + +//transform en to a rapidyaml tree +//en must be guaranteed to not be nullptr +//if sort_keys is true, it will sort all of the assoc keys +//returns true if it was able to create a yaml correctly, false if there was problematic data +bool EvaluableNodeToYamlStringRecurse(EvaluableNode *en, ryml::NodeRef &built_element, bool sort_keys) +{ + if(en->IsAssociativeArray()) + { + built_element |= ryml::MAP; + auto &mcn = en->GetMappedChildNodesReference(); + if(!sort_keys) + { + for(auto &[cn_id, cn] : mcn) + { + const auto &str = string_intern_pool.GetStringFromID(cn_id); + auto new_element = built_element.append_child(); + new_element << ryml::key(str); + if(!EvaluableNodeToYamlStringRecurse(cn, new_element, sort_keys)) + return false; + } + } + else //sort_keys + { + std::vector key_sids; + key_sids.reserve(mcn.size()); + for(auto &[key, _] : mcn) + key_sids.push_back(key); + + std::sort(begin(key_sids), end(key_sids), StringIDNaturalCompareSort); + + for(size_t i = 0; i < key_sids.size(); i++) + { + auto k = mcn.find(key_sids[i]); + + const auto &str = string_intern_pool.GetStringFromID(k->first); + auto new_element = built_element.append_child(); + new_element << ryml::key(str); + + if(!EvaluableNodeToYamlStringRecurse(k->second, new_element, sort_keys)) + return false; + } + } + } + else if(!en->IsImmediate()) + { + auto node_type = en->GetType(); + if(node_type == ENT_NULL) + { + //don't set anything + return true; + } + else if(node_type == ENT_TRUE) + { + built_element << "true"; + return true; + } + else if(node_type == ENT_FALSE) + { + built_element << "false"; + return true; + } + else if(node_type != ENT_LIST) + { + //must be a list, so return false as can't build + return false; + } + + built_element |= ryml::SEQ; + for(auto &cn : en->GetOrderedChildNodesReference()) + { + auto new_element = built_element.append_child(); + EvaluableNodeToYamlStringRecurse(cn, new_element, sort_keys); + } + } + else //immediate + { + if(DoesEvaluableNodeTypeUseNumberData(en->GetType())) + { + double number = en->GetNumberValue(); + built_element << number; + } + else + { + auto &str_value = en->GetStringValue(); + built_element << str_value; + } + } + + return true; +} + +EvaluableNode *EvaluableNodeYAMLTranslation::YamlToEvaluableNode(EvaluableNodeManager *enm, std::string &yaml_str) +{ + ryml::Tree tree = ryml::parse_in_arena(ryml::to_csubstr(yaml_str)); + + ryml::ConstNodeRef yaml_top_element = tree.rootref(); + + return YamlToEvaluableNodeRecurse(enm, yaml_top_element); +} + +std::string EvaluableNodeYAMLTranslation::EvaluableNodeToYaml(EvaluableNode *code, bool sort_keys) +{ + if(code == nullptr) + return "null"; + + //if need cycle check, double-check + if(!EvaluableNode::CanNodeTreeBeFlattened(code)) + return ""; + + ryml::Tree tree; + auto top_node = tree.rootref(); + if(EvaluableNodeToYamlStringRecurse(code, top_node, sort_keys)) + return ryml::emitrs_yaml(tree); + else + return ""; +} + +EvaluableNode *EvaluableNodeYAMLTranslation::Load(const std::string &resource_path, EvaluableNodeManager *enm) +{ + auto [data, data_success] = Platform_OpenFileAsString(resource_path); + if(!data_success) + { + std::cerr << data << std::endl; + return EvaluableNodeReference::Null(); + } + + ryml::Tree tree = ryml::parse_in_arena(ryml::to_csubstr(data)); + + ryml::ConstNodeRef yaml_top_element = tree.rootref(); + + return YamlToEvaluableNodeRecurse(enm, yaml_top_element); +} + +bool EvaluableNodeYAMLTranslation::Store(EvaluableNode *code, const std::string &resource_path, EvaluableNodeManager *enm, bool sort_keys) +{ + std::string error_string; + if(!Platform_IsResourcePathAccessible(resource_path, false, error_string)) + { + std::cerr << "Error storing YAML: " << error_string << std::endl; + return false; + } + + std::ofstream file(resource_path); + file << EvaluableNodeToYaml(code, sort_keys); + + return true; +} diff --git a/src/Amalgam/importexport/FileSupportYAML.h b/src/Amalgam/importexport/FileSupportYAML.h new file mode 100644 index 00000000..cd1b2261 --- /dev/null +++ b/src/Amalgam/importexport/FileSupportYAML.h @@ -0,0 +1,24 @@ +#pragma once + +//project headers: +#include "EvaluableNode.h" +#include "EvaluableNodeManagement.h" + +//system headers: +#include + +namespace EvaluableNodeYAMLTranslation +{ + //converts YAML string_view to EvaluableNode tree + EvaluableNode *YamlToEvaluableNode(EvaluableNodeManager *enm, std::string &yaml_str); + + //converts EvaluableNode tree to YAML string + // if sort_keys is true, it will sort all of the assoc keys + std::string EvaluableNodeToYaml(EvaluableNode *code, bool sort_keys = false); + + //loads yaml file to EvaluableNode tree + EvaluableNode *Load(const std::string &resource_path, EvaluableNodeManager *enm); + + //stores EvaluableNode tree to yaml file + bool Store(EvaluableNode *code, const std::string &resource_path, EvaluableNodeManager *enm, bool sort_keys); +}; diff --git a/src/Amalgam/interpreter/Interpreter.cpp b/src/Amalgam/interpreter/Interpreter.cpp new file mode 100644 index 00000000..c0532b50 --- /dev/null +++ b/src/Amalgam/interpreter/Interpreter.cpp @@ -0,0 +1,883 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryBuilder.h" +#include "EntityQueryManager.h" +#include "EvaluableNodeTreeDifference.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "PerformanceProfiler.h" +#include "StringInternPool.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +std::array Interpreter::_opcodes = { + + //built-in / system specific + &Interpreter::InterpretNode_ENT_SYSTEM, // ENT_SYSTEM + &Interpreter::InterpretNode_ENT_GET_DEFAULTS, // ENT_GET_DEFAULTS + + //parsing + &Interpreter::InterpretNode_ENT_PARSE, // ENT_PARSE + &Interpreter::InterpretNode_ENT_UNPARSE, // ENT_UNPARSE + + //core control + &Interpreter::InterpretNode_ENT_IF, // ENT_IF + &Interpreter::InterpretNode_ENT_SEQUENCE, // ENT_SEQUENCE + &Interpreter::InterpretNode_ENT_PARALLEL, // ENT_PARALLEL + &Interpreter::InterpretNode_ENT_LAMBDA, // ENT_LAMBDA + &Interpreter::InterpretNode_ENT_CONCLUDE, // ENT_CONCLUDE + &Interpreter::InterpretNode_ENT_CALL, // ENT_CALL + &Interpreter::InterpretNode_ENT_CALL_SANDBOXED, // ENT_CALL_SANDBOXED + &Interpreter::InterpretNode_ENT_WHILE, // ENT_WHILE + + //definitions + &Interpreter::InterpretNode_ENT_LET, // ENT_LET + &Interpreter::InterpretNode_ENT_DECLARE, // ENT_DECLARE + &Interpreter::InterpretNode_ENT_ASSIGN_and_ACCUM, // ENT_ASSIGN + &Interpreter::InterpretNode_ENT_ASSIGN_and_ACCUM, // ENT_ACCUM + + //retrieval + &Interpreter::InterpretNode_ENT_RETRIEVE, // ENT_RETRIEVE + &Interpreter::InterpretNode_ENT_GET, // ENT_GET + &Interpreter::InterpretNode_ENT_SET_and_REPLACE, // ENT_SET + &Interpreter::InterpretNode_ENT_SET_and_REPLACE, // ENT_REPLACE + + //stack and node manipulation + &Interpreter::InterpretNode_ENT_TARGET, // ENT_TARGET + &Interpreter::InterpretNode_ENT_TARGET_INDEX, // ENT_TARGET_INDEX + &Interpreter::InterpretNode_ENT_TARGET_VALUE, // ENT_TARGET_VALUE + &Interpreter::InterpretNode_ENT_STACK, // ENT_STACK + &Interpreter::InterpretNode_ENT_ARGS, // ENT_ARGS + + //simulation and operations + &Interpreter::InterpretNode_ENT_RAND, // ENT_RAND + &Interpreter::InterpretNode_ENT_WEIGHTED_RAND, // ENT_WEIGHTED_RAND + &Interpreter::InterpretNode_ENT_GET_RAND_SEED, // ENT_GET_RAND_SEED + &Interpreter::InterpretNode_ENT_SET_RAND_SEED, // ENT_SET_RAND_SEED + &Interpreter::InterpretNode_ENT_SYSTEM_TIME, // ENT_SYSTEM_TIME + + //base math + &Interpreter::InterpretNode_ENT_ADD, // ENT_ADD + &Interpreter::InterpretNode_ENT_SUBTRACT, // ENT_SUBTRACT + &Interpreter::InterpretNode_ENT_MULTIPLY, // ENT_MULTIPLY + &Interpreter::InterpretNode_ENT_DIVIDE, // ENT_DIVIDE + &Interpreter::InterpretNode_ENT_MODULUS, // ENT_MODULUS + &Interpreter::InterpretNode_ENT_GET_DIGITS, // ENT_GET_DIGITS + &Interpreter::InterpretNode_ENT_SET_DIGITS, // ENT_SET_DIGITS + &Interpreter::InterpretNode_ENT_FLOOR, // ENT_FLOOR + &Interpreter::InterpretNode_ENT_CEILING, // ENT_CEILING + &Interpreter::InterpretNode_ENT_ROUND, // ENT_ROUND + + //extended math + &Interpreter::InterpretNode_ENT_EXPONENT, // ENT_EXPONENT + &Interpreter::InterpretNode_ENT_LOG, // ENT_LOG + &Interpreter::InterpretNode_ENT_SIN, // ENT_SIN + &Interpreter::InterpretNode_ENT_ASIN, // ENT_ASIN + &Interpreter::InterpretNode_ENT_COS, // ENT_COS + &Interpreter::InterpretNode_ENT_ACOS, // ENT_ACOS + &Interpreter::InterpretNode_ENT_TAN, // ENT_TAN + &Interpreter::InterpretNode_ENT_ATAN, // ENT_ATAN + &Interpreter::InterpretNode_ENT_SINH, // ENT_SINH + &Interpreter::InterpretNode_ENT_ASINH, // ENT_ASINH + &Interpreter::InterpretNode_ENT_COSH, // ENT_COSH + &Interpreter::InterpretNode_ENT_ACOSH, // ENT_ACOSH + &Interpreter::InterpretNode_ENT_TANH, // ENT_TANH + &Interpreter::InterpretNode_ENT_ATANH, // ENT_ATANH + &Interpreter::InterpretNode_ENT_ERF, // ENT_ERF + &Interpreter::InterpretNode_ENT_TGAMMA, // ENT_TGAMMA + &Interpreter::InterpretNode_ENT_LGAMMA, // ENT_LGAMMA + &Interpreter::InterpretNode_ENT_SQRT, // ENT_SQRT + &Interpreter::InterpretNode_ENT_POW, // ENT_POW + &Interpreter::InterpretNode_ENT_ABS, // ENT_ABS + &Interpreter::InterpretNode_ENT_MAX, // ENT_MAX + &Interpreter::InterpretNode_ENT_MIN, // ENT_MIN + &Interpreter::InterpretNode_ENT_DOT_PRODUCT, // ENT_DOT_PRODUCT + &Interpreter::InterpretNode_ENT_GENERALIZED_DISTANCE, // ENT_GENERALIZED_DISTANCE + &Interpreter::InterpretNode_ENT_ENTROPY, // ENT_ENTROPY + + //list manipulation + &Interpreter::InterpretNode_ENT_FIRST, // ENT_FIRST + &Interpreter::InterpretNode_ENT_TAIL, // ENT_TAIL + &Interpreter::InterpretNode_ENT_LAST, // ENT_LAST + &Interpreter::InterpretNode_ENT_TRUNC, // ENT_TRUNC + &Interpreter::InterpretNode_ENT_APPEND, // ENT_APPEND + &Interpreter::InterpretNode_ENT_SIZE, // ENT_SIZE + &Interpreter::InterpretNode_ENT_RANGE, // ENT_RANGE + + //transformation + &Interpreter::InterpretNode_ENT_REWRITE, // ENT_REWRITE + &Interpreter::InterpretNode_ENT_MAP, // ENT_MAP + &Interpreter::InterpretNode_ENT_FILTER, // ENT_FILTER + &Interpreter::InterpretNode_ENT_WEAVE, // ENT_WEAVE + &Interpreter::InterpretNode_ENT_REDUCE, // ENT_REDUCE + &Interpreter::InterpretNode_ENT_APPLY, // ENT_APPLY + &Interpreter::InterpretNode_ENT_REVERSE, // ENT_REVERSE + &Interpreter::InterpretNode_ENT_SORT, // ENT_SORT + + //associative list manipulation + &Interpreter::InterpretNode_ENT_INDICES, // ENT_INDICES + &Interpreter::InterpretNode_ENT_VALUES, // ENT_VALUES + &Interpreter::InterpretNode_ENT_CONTAINS_INDEX, // ENT_CONTAINS_INDEX + &Interpreter::InterpretNode_ENT_CONTAINS_VALUE, // ENT_CONTAINS_VALUE + &Interpreter::InterpretNode_ENT_REMOVE, // ENT_REMOVE + &Interpreter::InterpretNode_ENT_KEEP, // ENT_KEEP + &Interpreter::InterpretNode_ENT_ASSOCIATE, // ENT_ASSOCIATE + &Interpreter::InterpretNode_ENT_ZIP, // ENT_ZIP + &Interpreter::InterpretNode_ENT_UNZIP, // ENT_UNZIP + + //logic + &Interpreter::InterpretNode_ENT_AND, // ENT_AND + &Interpreter::InterpretNode_ENT_OR, // ENT_OR + &Interpreter::InterpretNode_ENT_XOR, // ENT_XOR + &Interpreter::InterpretNode_ENT_NOT, // ENT_NOT + + //equivalence + &Interpreter::InterpretNode_ENT_EQUAL, // ENT_EQUAL + &Interpreter::InterpretNode_ENT_NEQUAL, // ENT_NEQUAL + &Interpreter::InterpretNode_ENT_LESS_and_LEQUAL, // ENT_LESS + &Interpreter::InterpretNode_ENT_LESS_and_LEQUAL, // ENT_LEQUAL + &Interpreter::InterpretNode_ENT_GREATER_and_GEQUAL, // ENT_GREATER + &Interpreter::InterpretNode_ENT_GREATER_and_GEQUAL, // ENT_GEQUAL + &Interpreter::InterpretNode_ENT_TYPE_EQUALS, // ENT_TYPE_EQUALS + &Interpreter::InterpretNode_ENT_TYPE_NEQUALS, // ENT_TYPE_NEQUALS + + //built-in constants and variables + &Interpreter::InterpretNode_ENT_TRUE, // ENT_TRUE + &Interpreter::InterpretNode_ENT_FALSE, // ENT_FALSE + &Interpreter::InterpretNode_ENT_NULL, // ENT_NULL + + //data types + &Interpreter::InterpretNode_ENT_LIST, // ENT_LIST + &Interpreter::InterpretNode_ENT_ASSOC, // ENT_ASSOC + &Interpreter::InterpretNode_ENT_NUMBER, // ENT_NUMBER + &Interpreter::InterpretNode_ENT_STRING, // ENT_STRING + &Interpreter::InterpretNode_ENT_SYMBOL, // ENT_SYMBOL + + //node types + &Interpreter::InterpretNode_ENT_GET_TYPE, // ENT_GET_TYPE + &Interpreter::InterpretNode_ENT_GET_TYPE_STRING, // ENT_GET_TYPE_STRING + &Interpreter::InterpretNode_ENT_SET_TYPE, // ENT_SET_TYPE + &Interpreter::InterpretNode_ENT_FORMAT, // ENT_FORMAT + + //EvaluableNode management: labels, comments, and concurrency + &Interpreter::InterpretNode_ENT_GET_LABELS, // ENT_GET_LABELS + &Interpreter::InterpretNode_ENT_GET_ALL_LABELS, // ENT_GET_ALL_LABELS + &Interpreter::InterpretNode_ENT_SET_LABELS, // ENT_SET_LABELS + &Interpreter::InterpretNode_ENT_ZIP_LABELS, // ENT_ZIP_LABELS + &Interpreter::InterpretNode_ENT_GET_COMMENTS, // ENT_GET_COMMENTS + &Interpreter::InterpretNode_ENT_SET_COMMENTS, // ENT_SET_COMMENTS + &Interpreter::InterpretNode_ENT_GET_CONCURRENCY, // ENT_GET_CONCURRENCY + &Interpreter::InterpretNode_ENT_SET_CONCURRENCY, // ENT_SET_CONCURRENCY + &Interpreter::InterpretNode_ENT_GET_VALUE, // ENT_GET_VALUE + &Interpreter::InterpretNode_ENT_SET_VALUE, // ENT_SET_VALUE + + //string + &Interpreter::InterpretNode_ENT_EXPLODE, // ENT_EXPLODE + &Interpreter::InterpretNode_ENT_SPLIT, // ENT_SPLIT + &Interpreter::InterpretNode_ENT_SUBSTR, // ENT_SUBSTR + &Interpreter::InterpretNode_ENT_CONCAT, // ENT_CONCAT + + //encryption + &Interpreter::InterpretNode_ENT_CRYPTO_SIGN, // ENT_CRYPTO_SIGN + &Interpreter::InterpretNode_ENT_CRYPTO_SIGN_VERIFY, // ENT_CRYPTO_SIGN_VERIFY + &Interpreter::InterpretNode_ENT_ENCRYPT, // ENT_ENCRYPT + &Interpreter::InterpretNode_ENT_DECRYPT, // ENT_DECRYPT + + //I/O + &Interpreter::InterpretNode_ENT_PRINT, // ENT_PRINT + + //tree merging + &Interpreter::InterpretNode_ENT_TOTAL_SIZE, // ENT_TOTAL_SIZE + &Interpreter::InterpretNode_ENT_MUTATE, // ENT_MUTATE + &Interpreter::InterpretNode_ENT_COMMONALITY, // ENT_COMMONALITY + &Interpreter::InterpretNode_ENT_EDIT_DISTANCE, // ENT_EDIT_DISTANCE + &Interpreter::InterpretNode_ENT_INTERSECT, // ENT_INTERSECT + &Interpreter::InterpretNode_ENT_UNION, // ENT_UNION + &Interpreter::InterpretNode_ENT_DIFFERENCE, // ENT_DIFFERENCE + &Interpreter::InterpretNode_ENT_MIX, // ENT_MIX + &Interpreter::InterpretNode_ENT_MIX_LABELS, // ENT_MIX_LABELS + + //entity merging + &Interpreter::InterpretNode_ENT_TOTAL_ENTITY_SIZE, // ENT_TOTAL_ENTITY_SIZE + &Interpreter::InterpretNode_ENT_FLATTEN_ENTITY, // ENT_FLATTEN_ENTITY + &Interpreter::InterpretNode_ENT_MUTATE_ENTITY, // ENT_MUTATE_ENTITY + &Interpreter::InterpretNode_ENT_COMMONALITY_ENTITIES, // ENT_COMMONALITY_ENTITIES + &Interpreter::InterpretNode_ENT_EDIT_DISTANCE_ENTITIES, // ENT_EDIT_DISTANCE_ENTITIES + &Interpreter::InterpretNode_ENT_INTERSECT_ENTITIES, // ENT_INTERSECT_ENTITIES + &Interpreter::InterpretNode_ENT_UNION_ENTITIES, // ENT_UNION_ENTITIES + &Interpreter::InterpretNode_ENT_DIFFERENCE_ENTITIES, // ENT_DIFFERENCE_ENTITIES + &Interpreter::InterpretNode_ENT_MIX_ENTITIES, // ENT_MIX_ENTITIES + + //entity details + &Interpreter::InterpretNode_ENT_GET_ENTITY_COMMENTS, // ENT_GET_ENTITY_COMMENTS + &Interpreter::InterpretNode_ENT_RETRIEVE_ENTITY_ROOT, // ENT_RETRIEVE_ENTITY_ROOT + &Interpreter::InterpretNode_ENT_ASSIGN_ENTITY_ROOTS_and_ACCUM_ENTITY_ROOTS, // ENT_ASSIGN_ENTITY_ROOTS + &Interpreter::InterpretNode_ENT_ASSIGN_ENTITY_ROOTS_and_ACCUM_ENTITY_ROOTS, // ENT_ACCUM_ENTITY_ROOTS + &Interpreter::InterpretNode_ENT_GET_ENTITY_RAND_SEED, // ENT_GET_ENTITY_RAND_SEED + &Interpreter::InterpretNode_ENT_SET_ENTITY_RAND_SEED, // ENT_SET_ENTITY_RAND_SEED + &Interpreter::InterpretNode_ENT_GET_ENTITY_ROOT_PERMISSION, // ENT_GET_ENTITY_ROOT_PERMISSION + &Interpreter::InterpretNode_ENT_SET_ENTITY_ROOT_PERMISSION, // ENT_SET_ENTITY_ROOT_PERMISSION + + //entity base actions + &Interpreter::InterpretNode_ENT_CREATE_ENTITIES, // ENT_CREATE_ENTITIES + &Interpreter::InterpretNode_ENT_CLONE_ENTITIES, // ENT_CLONE_ENTITIES + &Interpreter::InterpretNode_ENT_MOVE_ENTITIES, // ENT_MOVE_ENTITIES + &Interpreter::InterpretNode_ENT_DESTROY_ENTITIES, // ENT_DESTROY_ENTITIES + &Interpreter::InterpretNode_ENT_LOAD, // ENT_LOAD + &Interpreter::InterpretNode_ENT_LOAD_ENTITY_and_LOAD_PERSISTENT_ENTITY, // ENT_LOAD_ENTITY + &Interpreter::InterpretNode_ENT_LOAD_ENTITY_and_LOAD_PERSISTENT_ENTITY, // ENT_LOAD_PERSIST + &Interpreter::InterpretNode_ENT_STORE, // ENT_STORE + &Interpreter::InterpretNode_ENT_STORE_ENTITY, // ENT_STORE_ENTITY + &Interpreter::InterpretNode_ENT_CONTAINS_ENTITY, // ENT_CONTAINS_ENTITY + + //entity query + &Interpreter::InterpretNode_ENT_CONTAINED_ENTITIES_and_COMPUTE_ON_CONTAINED_ENTITIES, // ENT_CONTAINED_ENTITIES + &Interpreter::InterpretNode_ENT_CONTAINED_ENTITIES_and_COMPUTE_ON_CONTAINED_ENTITIES, // ENT_COMPUTE_ON_CONTAINED_ENTITIES + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_SELECT + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_SAMPLE + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_WEIGHTED_SAMPLE + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_IN_ENTITY_LIST + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_NOT_IN_ENTITY_LIST + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_COUNT + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_EXISTS + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_NOT_EXISTS + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_EQUALS + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_NOT_EQUALS + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_BETWEEN + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_NOT_BETWEEN + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_AMONG + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_NOT_AMONG + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_MAX + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_MIN + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_SUM + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_MODE + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_QUANTILE + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_GENERALIZED_MEAN + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_MIN_DIFFERENCE + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_MAX_DIFFERENCE + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_VALUE_MASSES + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_GREATER_OR_EQUAL_TO + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_LESS_OR_EQUAL_TO + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_WITHIN_GENERALIZED_DISTANCE + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_QUERY_NEAREST_GENERALIZED_DISTANCE + + //aggregate analysis query Functions + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_COMPUTE_ENTITY_CONVICTIONS + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS + &Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes, // ENT_COMPUTE_ENTITY_KL_DIVERGENCES + + //entity access + &Interpreter::InterpretNode_ENT_CONTAINS_LABEL, // ENT_CONTAINS_LABEL + &Interpreter::InterpretNode_ENT_ASSIGN_TO_ENTITIES_and_DIRECT_ASSIGN_TO_ENTITIES_and_ACCUM_TO_ENTITIES, // ENT_ASSIGN_TO_ENTITIES + &Interpreter::InterpretNode_ENT_ASSIGN_TO_ENTITIES_and_DIRECT_ASSIGN_TO_ENTITIES_and_ACCUM_TO_ENTITIES, // ENT_DIRECT_ASSIGN_TO_ENTITIES + &Interpreter::InterpretNode_ENT_ASSIGN_TO_ENTITIES_and_DIRECT_ASSIGN_TO_ENTITIES_and_ACCUM_TO_ENTITIES, // ENT_ACCUM_TO_ENTITIES + &Interpreter::InterpretNode_ENT_RETRIEVE_FROM_ENTITY_and_DIRECT_RETRIEVE_FROM_ENTITY, // ENT_RETRIEVE_FROM_ENTITY + &Interpreter::InterpretNode_ENT_RETRIEVE_FROM_ENTITY_and_DIRECT_RETRIEVE_FROM_ENTITY, // ENT_DIRECT_RETRIEVE_FROM_ENTITY + &Interpreter::InterpretNode_ENT_CALL_ENTITY_and_CALL_ENTITY_GET_CHANGES, // ENT_CALL_ENTITY + &Interpreter::InterpretNode_ENT_CALL_ENTITY_and_CALL_ENTITY_GET_CHANGES, // ENT_CALL_ENTITY_GET_CHANGES + &Interpreter::InterpretNode_ENT_CALL_CONTAINER, // ENT_CALL_CONTAINER + + //not in active memory + &Interpreter::InterpretNode_ENT_DEALLOCATED, // ENT_DEALLOCATED + &Interpreter::InterpretNode_ENT_DEALLOCATED, // ENT_UNINITIALIZED + + //something went wrong - maximum value + &Interpreter::InterpretNode_ENT_NOT_A_BUILT_IN_TYPE, // ENT_NOT_A_BUILT_IN_TYPE +}; + + +Interpreter::Interpreter(EvaluableNodeManager *enm, + ExecutionCycleCount max_num_steps, size_t max_num_nodes, RandomStream rand_stream, + std::vector *write_listeners, PrintListener *print_listener, + Entity *t, Interpreter *calling_interpreter) +{ + curExecutionStep = 0; + maxNumExecutionSteps = max_num_steps; + + //account for what is already in use + curNumExecutionNodes = enm->GetNumberOfUsedNodes(); + curNumExecutionNodesAllocatedToEntities = 0; + if(max_num_nodes == 0) + maxNumExecutionNodes = 0; + else + maxNumExecutionNodes = max_num_nodes + enm->GetNumberOfUsedNodes(); + + randomStream = rand_stream; + curEntity = t; + callingInterpreter = calling_interpreter; + writeListeners = write_listeners; + printListener = print_listener; + + callStackNodes = nullptr; + interpreterNodeStackNodes = nullptr; + constructionStackNodes = nullptr; + + evaluableNodeManager = enm; +} + +#ifdef MULTITHREAD_SUPPORT +EvaluableNodeReference Interpreter::ExecuteNode(EvaluableNode *en, + EvaluableNode *call_stack, EvaluableNode *interpreter_node_stack, + EvaluableNode *construction_stack, std::vector *construction_stack_indices, + Concurrency::SingleMutex *call_stack_write_mutex) +#else +EvaluableNodeReference Interpreter::ExecuteNode(EvaluableNode *en, + EvaluableNode *call_stack, EvaluableNode *interpreter_node_stack, + EvaluableNode *construction_stack, std::vector *construction_stack_indices) +#endif +{ + +#ifdef MULTITHREAD_SUPPORT + if(call_stack == nullptr) + callStackSharedAccessStartingDepth = 0; + else + callStackSharedAccessStartingDepth = call_stack->GetOrderedChildNodes().size(); + + callStackWriteMutex = call_stack_write_mutex; +#endif + + //use specified or create new callStack + if(call_stack == nullptr) + { + //create list of associative lists, and populate it with the top of the stack + call_stack = evaluableNodeManager->AllocNode(ENT_LIST); + + EvaluableNode *new_context_entry = evaluableNodeManager->AllocNode(ENT_ASSOC); + call_stack->AppendOrderedChildNode(new_context_entry); + } + + if(interpreter_node_stack == nullptr) + interpreter_node_stack = evaluableNodeManager->AllocNode(ENT_LIST); + + if(construction_stack == nullptr) + construction_stack = evaluableNodeManager->AllocNode(ENT_LIST); + + callStackNodes = &call_stack->GetOrderedChildNodes(); + interpreterNodeStackNodes = &interpreter_node_stack->GetOrderedChildNodes(); + constructionStackNodes = &construction_stack->GetOrderedChildNodes(); + + if(construction_stack_indices != nullptr) + constructionStackIndices = *construction_stack_indices; + + //protect all of the stacks with needing cycle free checks + // in case a node is added to one which isn't cycle free + call_stack->SetNeedCycleCheck(true); + for(auto &cn : call_stack->GetOrderedChildNodesReference()) + cn->SetNeedCycleCheck(true); + interpreter_node_stack->SetNeedCycleCheck(true); + construction_stack->SetNeedCycleCheck(true); + + //keep these references as long as the interpreter is around + std::array nodes_to_keep = { call_stack, interpreter_node_stack, construction_stack }; + evaluableNodeManager->KeepNodeReferences(nodes_to_keep); + auto retval = InterpretNode(en); + evaluableNodeManager->FreeNodeReferences(nodes_to_keep); + + //remove these nodes + evaluableNodeManager->FreeNode(interpreter_node_stack); + evaluableNodeManager->FreeNode(construction_stack); + + return retval; +} + +Interpreter::~Interpreter() +{ + +} + +EvaluableNodeReference Interpreter::ConvertArgsToCallStack(EvaluableNodeReference &args, EvaluableNodeManager *enm) +{ + if(enm == nullptr) + return EvaluableNodeReference::Null(); + + //ensure have arguments + if(args == nullptr) + { + args.reference = enm->AllocNode(ENT_ASSOC); + args.unique = true; + } + else if(!args->IsAssociativeArray()) + { + enm->FreeNodeTreeIfPossible(args); + args.reference = enm->AllocNode(ENT_ASSOC); + args.unique = true; + } + else if(!args.unique) + { + args.reference = enm->AllocNode(args); + } + + EvaluableNode *call_stack = enm->AllocNode(ENT_LIST); + call_stack->AppendOrderedChildNode(args); + + return EvaluableNodeReference(call_stack, args.unique); +} + +EvaluableNode **Interpreter::GetExecutionContextSymbolLocation(const StringInternPool::StringID symbol_sid, size_t &call_stack_index) +{ + //find symbol by walking up the stack; each layer must be an assoc + for(call_stack_index = callStackNodes->size(); call_stack_index > 0; call_stack_index--) + { + EvaluableNode *cur_context = (*callStackNodes)[call_stack_index - 1]; + + //see if this level of the stack contains the symbol + auto &mcn = cur_context->GetMappedChildNodesReference(); + auto found = mcn.find(symbol_sid); + if(found != end(mcn)) + { + //subtract one here to match the subtraction above + call_stack_index--; + + return &found->second; + } + } + + //didn't find it anywhere, so default it to the current top of the stack + call_stack_index = callStackNodes->size() - 1; + return nullptr; +} + +EvaluableNode **Interpreter::GetOrCreateExecutionContextSymbolLocation(const StringInternPool::StringID symbol_sid, size_t &call_stack_index) +{ + //find appropriate context for symbol by walking up the stack + for(call_stack_index = callStackNodes->size(); call_stack_index > 0; call_stack_index--) + { + EvaluableNode *cur_context = (*callStackNodes)[call_stack_index - 1]; + + //see if this level of the stack contains the symbol + auto &mcn = cur_context->GetMappedChildNodesReference(); + auto found = mcn.find(symbol_sid); + if(found != end(mcn)) + { + //subtract one here to match the subtraction above + call_stack_index--; + + return &found->second; + } + } + + //didn't find it anywhere, so default it to the current top of the stack and create it + call_stack_index = callStackNodes->size() - 1; + EvaluableNode *context_to_use = (*callStackNodes)[call_stack_index]; + return context_to_use->GetOrCreateMappedChildNode(symbol_sid); +} + +EvaluableNodeReference Interpreter::InterpretNode(EvaluableNode *en) +{ + if(EvaluableNode::IsNull(en)) + return EvaluableNodeReference::Null(); + +#ifdef INTERPRETER_PROFILE_OPCODES + std::string opcode_str; + + //if debugging sources is enabled, then concatenate the opcode to the first line of the comment + if(asset_manager.debugSources) + { + if(en->HasComments()) + { + auto &comment = en->GetCommentsString(); + auto first_line_end = comment.find('\n'); + if(first_line_end == std::string::npos) + opcode_str = comment; + else //copy up until newline + { + opcode_str = comment.substr(0, first_line_end); + if(opcode_str.size() > 0 && opcode_str.back() == '\r') + opcode_str.pop_back(); + } + + opcode_str += ": "; + } + } + + opcode_str += GetStringFromEvaluableNodeType(en->GetType(), true); + performance_profiler.StartOperation(opcode_str, evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + //make sure don't run for longer than allowed + if(!AllowUnlimitedExecutionSteps()) + { + curExecutionStep++; + if(curExecutionStep >= maxNumExecutionSteps) + { +#ifdef INTERPRETER_PROFILE_OPCODES + performance_profiler.EndOperation(evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + return EvaluableNodeReference::Null(); + } + } + + evaluableNodeManager->executionCyclesSinceLastGarbageCollection++; + + //reference this node before we collect garbage + //CreateInterpreterNodeStackStateSaver is a bit expensive for this frequently called function + //especially because only one node is kept + interpreterNodeStackNodes->push_back(en); + + //for deep debugging only + //ValidateEvaluableNodeIntegrity(); + + //perform garbage collection +#if defined(INTERPRETER_PROFILE_OPCODES) || defined(INTERPRETER_PROFILE_LABELS_CALLED) + const std::string collect_garbage_string = ".collect_garbage"; + if(evaluableNodeManager->RecommendGarbageCollection()) + { + performance_profiler.StartOperation(collect_garbage_string, evaluableNodeManager->GetNumberOfUsedNodes()); + CollectGarbage(); + performance_profiler.EndOperation(evaluableNodeManager->GetNumberOfUsedNodes()); + } +#else + CollectGarbage(); +#endif + + //for deep debugging only + //ValidateEvaluableNodeIntegrity(); + + //make sure don't eat more memory than allowed + if(!AllowUnlimitedExecutionNodes()) + { + UpdateCurNumExecutionNodes(); + if(curNumExecutionNodes >= maxNumExecutionNodes) + { +#ifdef INTERPRETER_PROFILE_OPCODES + performance_profiler.EndOperation(evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + return EvaluableNodeReference::Null(); + } + } + + //get corresponding opcode + EvaluableNodeType ent = en->GetType(); + auto oc = _opcodes[ent]; + + EvaluableNodeReference retval = (this->*oc)(en); + + //for deep debugging only + //ValidateEvaluableNodeIntegrity(); + +#ifdef INTERPRETER_PROFILE_OPCODES + performance_profiler.EndOperation(evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + //finished with opcode + interpreterNodeStackNodes->pop_back(); + + return retval; +} + +EvaluableNode *Interpreter::GetCurrentExecutionContext() +{ + //this should not happen, but just in case + if(callStackNodes->size() < 1) + return nullptr; + + return callStackNodes->back(); +} + +std::pair Interpreter::InterpretNodeIntoStringValue(EvaluableNode *n) +{ + if(EvaluableNode::IsEmptyNode(n)) + return std::make_pair(false, ""); + + //shortcut if the node has what is being asked + if(n->GetType() == ENT_STRING) + return std::make_pair(true, n->GetStringValue()); + + auto result = InterpretNodeForImmediateUse(n); + std::string result_string = EvaluableNode::ToString(result); + evaluableNodeManager->FreeNodeTreeIfPossible(result); + + return std::make_pair(true, result_string); +} + +std::string Interpreter::InterpretNodeIntoStringValueEmptyNull(EvaluableNode *n) +{ + if(EvaluableNode::IsEmptyNode(n)) + return ""; + + //shortcut if the node has what is being asked + if(n->GetType() == ENT_STRING) + return n->GetStringValue(); + + auto result = InterpretNodeForImmediateUse(n); + + if(EvaluableNode::IsEmptyNode(result)) + return ""; + + std::string result_string = EvaluableNode::ToString(result); + evaluableNodeManager->FreeNodeTreeIfPossible(result); + + return result_string; +} + +StringInternPool::StringID Interpreter::InterpretNodeIntoStringIDValueIfExists(EvaluableNode *n) +{ + //shortcut if the node has what is being asked + if(n != nullptr && n->GetType() == ENT_STRING) + return n->GetStringID(); + + auto result = InterpretNodeForImmediateUse(n); + StringInternPool::StringID result_sid = EvaluableNode::ToStringIDIfExists(result); + evaluableNodeManager->FreeNodeTreeIfPossible(result); + + return result_sid; +} + +StringInternPool::StringID Interpreter::InterpretNodeIntoStringIDValueWithReference(EvaluableNode *n) +{ + //shortcut if the node has what is being asked + if(n != nullptr && n->GetType() == ENT_STRING) + return string_intern_pool.CreateStringReference(n->GetStringID()); + + auto result = InterpretNodeForImmediateUse(n); + + StringInternPool::StringID result_sid = string_intern_pool.NOT_A_STRING_ID; + //if have a unique string, then just grab the string's reference instead of creating a new one + if(result.unique) + { + if(result != nullptr && result->IsStringValue()) + result_sid = result->GetAndClearStringIDWithReference(); + else + result_sid = EvaluableNode::ToStringIDWithReference(result); + + evaluableNodeManager->FreeNodeTree(result); + } + else //not unique, so can't free + { + result_sid = EvaluableNode::ToStringIDWithReference(result); + } + + return result_sid; +} + +EvaluableNode *Interpreter::InterpretNodeIntoUniqueStringIDValueEvaluableNode(EvaluableNode *n) +{ + //if can skip InterpretNode, then just allocate the string + if(n == nullptr || n->GetIsIdempotent() + || n->GetType() == ENT_STRING || n->GetType() == ENT_NUMBER) + return evaluableNodeManager->AllocNodeWithReferenceHandoff(ENT_STRING, + EvaluableNode::ToStringIDWithReference(n)); + + auto result = InterpretNode(n); + + if(result == nullptr || !result.unique) + return evaluableNodeManager->AllocNodeWithReferenceHandoff(ENT_STRING, + EvaluableNode::ToStringIDWithReference(result)); + + result->ClearMetadata(); + + if(result->GetType() != ENT_STRING) + result->SetType(ENT_STRING, evaluableNodeManager); + + return result; +} + +double Interpreter::InterpretNodeIntoNumberValue(EvaluableNode *n) +{ + //shortcut if the node has what is being asked + if(n != nullptr && n->GetType() == ENT_NUMBER) + return n->GetNumberValue(); + + auto result = InterpretNodeForImmediateUse(n); + double result_value = EvaluableNode::ToNumber(result); + evaluableNodeManager->FreeNodeTreeIfPossible(result); + + return result_value; +} + +EvaluableNode *Interpreter::InterpretNodeIntoUniqueNumberValueEvaluableNode(EvaluableNode *n) +{ + if(n == nullptr || n->GetIsIdempotent()) + return evaluableNodeManager->AllocNode(EvaluableNode::ToNumber(n)); + + auto result = InterpretNode(n); + + if(result == nullptr || !result.unique) + return evaluableNodeManager->AllocNode(EvaluableNode::ToNumber(result)); + + result->ClearMetadata(); + + if(result->GetType() != ENT_NUMBER) + result->SetType(ENT_NUMBER, evaluableNodeManager); + + return result; +} + +bool Interpreter::InterpretNodeIntoBoolValue(EvaluableNode *n, bool value_if_null) +{ + //shortcut if the node has what is being asked + if(n == nullptr) + return value_if_null; + + auto result = InterpretNodeForImmediateUse(n); + bool result_value = value_if_null; + if(!EvaluableNode::IsNull(result)) + result_value = EvaluableNode::IsTrue(result); + + evaluableNodeManager->FreeNodeTreeIfPossible(result); + + return result_value; +} + +void Interpreter::InterpretNodeIntoDestinationEntity(EvaluableNode *n, Entity *&destination_entity_parent, StringInternRef &new_entity_id) +{ + EvaluableNodeReference new_entity_id_node = InterpretNodeForImmediateUse(n); + TraverseEntityToNewDestinationViaEvaluableNodeIDPath(curEntity, new_entity_id_node, destination_entity_parent, new_entity_id); + evaluableNodeManager->FreeNodeTreeIfPossible(new_entity_id_node); +} + +EvaluableNode **Interpreter::TraverseToDestinationFromTraversalPathList(EvaluableNode **source, EvaluableNodeReference &tpl, bool create_destination_if_necessary) +{ + EvaluableNode **address_list; + //default list length to 1 + size_t address_list_length = 1; + + //if it's an actual address list, then use it + if(tpl != nullptr && DoesEvaluableNodeTypeUseOrderedData(tpl->GetType())) + { + auto &ocn = tpl->GetOrderedChildNodes(); + address_list = ocn.data(); + address_list_length = ocn.size(); + } + else //it's only a single value; use default list length of 1 + { + address_list = &(tpl.reference); + } + + size_t max_num_nodes = 0; + if(!AllowUnlimitedExecutionNodes()) + max_num_nodes = (maxNumExecutionNodes - curNumExecutionNodes); + + EvaluableNode **destination = GetRelativeEvaluableNodeFromTraversalPathList(source, address_list, address_list_length, create_destination_if_necessary ? evaluableNodeManager : nullptr, max_num_nodes); + + return destination; +} + +Entity *Interpreter::InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(EvaluableNode *node_id_path_to_interpret) +{ + if(curEntity == nullptr) + return nullptr; + + if(EvaluableNode::IsEmptyNode(node_id_path_to_interpret)) + return curEntity; + + //only need to interpret if not idempotent + EvaluableNodeReference source_id_node = InterpretNodeForImmediateUse(node_id_path_to_interpret); + Entity *source_entity = TraverseToExistingEntityViaEvaluableNodeIDPath(curEntity, source_id_node); + evaluableNodeManager->FreeNodeTreeIfPossible(source_id_node); + + return source_entity; +} + +EvaluableNode *Interpreter::RewriteByFunction(EvaluableNodeReference function, EvaluableNode *top_node, EvaluableNode *n, EvaluableNode::ReferenceSetType &references) +{ + if(function == nullptr || n == nullptr) + return nullptr; + + //try to record in references, but if already processed and exists, then return the existing value + if(references.insert(n).second == false) + return n; + + if(n->IsAssociativeArray()) + { + PushNewConstructionContext(top_node, nullptr, EvaluableNodeImmediateValueWithType(StringInternPool::NOT_A_STRING_ID), n); + + for(auto &[e_id, e] : n->GetMappedChildNodesReference()) + { + SetTopTargetValueIndexInConstructionStack(e_id); + SetTopTargetValueReferenceInConstructionStack(e); + e = RewriteByFunction(function, top_node, e, references); + } + + PopConstructionContext(); + } + else + { + auto &ocn = n->GetOrderedChildNodes(); + if(ocn.size() > 0) + { + PushNewConstructionContext(top_node, nullptr, EvaluableNodeImmediateValueWithType(0.0), n); + + //rewrite child nodes before rewriting this one + for(size_t i = 0; i < ocn.size(); i++) + { + SetTopTargetValueIndexInConstructionStack(static_cast(i)); + SetTopTargetValueReferenceInConstructionStack(ocn[i]); + ocn[i] = RewriteByFunction(function, top_node, ocn[i], references); + } + + PopConstructionContext(); + } + } + + EvaluableNodeReference result = InterpretNode(function); + //reuse the existing node since it has already been deepcopied + n->CopyValueFrom(result); + + return result; +} + +#ifdef MULTITHREAD_SUPPORT + +bool Interpreter::InterpretEvaluableNodesConcurrently(EvaluableNode *parent_node, std::vector &nodes, std::vector &interpreted_nodes) +{ + if(!parent_node->GetConcurrency()) + return false; + + size_t num_elements = nodes.size(); + if(num_elements < 2) + return false; + + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(!enqueue_task_lock.AreThreadsAvailable()) + return false; + + ConcurrencyManager concurrency_manager(this, num_elements); + + //kick off interpreters + for(size_t element_index = 0; element_index < num_elements; element_index++) + { + auto &interpreter = *concurrency_manager.interpreters[element_index]; + EvaluableNode *node_to_execute = nodes[element_index]; + + concurrency_manager.resultFutures.emplace_back( + Concurrency::threadPool.EnqueueBatchTask( + [this, &interpreter, node_to_execute, &concurrency_manager] + { + interpreter.memoryModificationLock = Concurrency::ReadLock(interpreter.evaluableNodeManager->memoryModificationMutex); + auto result = interpreter.ExecuteNode(node_to_execute, + evaluableNodeManager->AllocListNode(callStackNodes), + evaluableNodeManager->AllocListNode(interpreterNodeStackNodes), + evaluableNodeManager->AllocListNode(constructionStackNodes), + &constructionStackIndices, + concurrency_manager.GetCallStackWriteMutex()); + + evaluableNodeManager->KeepNodeReference(result); + interpreter.memoryModificationLock.unlock(); + return result; + } + ) + ); + } + + enqueue_task_lock.Unlock(); + + concurrency_manager.EndConcurrency(); + interpreted_nodes = concurrency_manager.GetResultsAndFreeReferences(); + return true; +} + +#endif diff --git a/src/Amalgam/interpreter/Interpreter.h b/src/Amalgam/interpreter/Interpreter.h new file mode 100644 index 00000000..3dfed65f --- /dev/null +++ b/src/Amalgam/interpreter/Interpreter.h @@ -0,0 +1,821 @@ +#pragma once + +//project headers: +#include "Entity.h" +#include "EvaluableNode.h" +#include "EvaluableNodeManagement.h" +#include "FastMath.h" +#include "Parser.h" +#include "PerformanceProfiler.h" +#include "PrintListener.h" +#include "RandomStream.h" + +//system headers: +#include +#include +#include +#include +#include +#include + +//Forward declarations: +class EntityQueryCondition; + +//if defined, will instrument and profile timing for each opcode, reported when profiling is enabled +//#define INTERPRETER_PROFILE_OPCODES + +//if defined, will instrument and profile timing for each entity label called +//#define INTERPRETER_PROFILE_LABELS_CALLED + +class Interpreter +{ +public: + + //Creates a new interpreter to run code and to store labels. + // If no entity is specified via nullptr, then it will run sandboxed + // Uses max_num_steps as the maximum number of operations that can be executed by this and any subordinate operations called. If max_num_steps is 0, then it will execute unlimeted steps + // Uses max_num_nodes as the maximum number of nodes that can be allocated in memory by this and any subordinate operations called. If max_num_nodes is 0, then it will allow unlimited allocations + // max_num_sets is also used for any subsequently limited executions + Interpreter(EvaluableNodeManager *enm, + ExecutionCycleCount max_num_steps, size_t max_num_nodes, RandomStream rand_stream, + std::vector *write_listeners, PrintListener *print_listener, + Entity *t = nullptr, Interpreter *calling_interpreter = nullptr + ); + ~Interpreter(); + + //Executes the current Entity that this Interpreter is contained by + // sets up all of the stack and contextual structures, then calls InterpretNode on en + //if call_stack, interpreter_node_stack, or construction_stack are nullptr, it will start with a new one + //note that construction_stack and construction_stack_indices should be specified together and should be the same length +#ifdef MULTITHREAD_SUPPORT + //if run multithreaded, then for performance reasons, it is optimal to have one of each stack per thread + // and call_stack_write_mutex is the mutex needed to lock for writing + EvaluableNodeReference ExecuteNode(EvaluableNode *en, + EvaluableNode *call_stack = nullptr, EvaluableNode *interpreter_node_stack = nullptr, + EvaluableNode *construction_stack = nullptr, std::vector *construction_stack_indices = nullptr, + Concurrency::SingleMutex *call_stack_write_mutex = nullptr); +#else + EvaluableNodeReference ExecuteNode(EvaluableNode *en, + EvaluableNode *call_stack = nullptr, EvaluableNode *interpreter_node_stack = nullptr, + EvaluableNode *construction_stack = nullptr, std::vector *construction_stack_indices = nullptr); +#endif + + //changes debugging state to debugging_enabled + static void SetDebuggingState(bool debugging_enabled); + + //when debugging, checks any relevant breakpoints and update debugger state if any are triggered + // if before_opcode is true, then it is checking before it is run, otherwise it'll check after it is completed + void DebugCheckBreakpointsAndUpdateState(EvaluableNode *en, bool before_opcode); + + //collects garbage on evaluableNodeManager + __forceinline void CollectGarbage() + { +#ifdef MULTITHREAD_SUPPORT + evaluableNodeManager->CollectGarbage(&memoryModificationLock); +#else + evaluableNodeManager->CollectGarbage(); +#endif + } + + //pushes new_context on the stack; new_context should be a unique associative array, + // but if not, it will attempt to put an appropriate unique associative array on callStackNodes + __forceinline void PushNewExecutionContext(EvaluableNodeReference new_context) + { + //make sure unique assoc + if(EvaluableNode::IsAssociativeArray(new_context)) + { + if(!new_context.unique) + new_context.reference = evaluableNodeManager->AllocNode(new_context); + } + else //not assoc, make a new one + { + evaluableNodeManager->FreeNodeTreeIfPossible(new_context); + new_context.reference = evaluableNodeManager->AllocNode(ENT_ASSOC); + } + + //just in case a variable is added which needs cycle checks + new_context->SetNeedCycleCheck(true); + + callStackNodes->push_back(new_context); + } + + //pops the top execution context off the stack + __forceinline void PopExecutionContext() + { + if(callStackNodes->size() >= 1) + callStackNodes->pop_back(); + } + + //pushes a new construction context on the stack, which is assumed to not be nullptr + //the stack is indexed via the constructionStackOffset* constants + //returns the new size + static inline void PushNewConstructionContextToStack(std::vector &stack_nodes, + std::vector &stack_node_indices, + EvaluableNode *target_origin, EvaluableNode *target, EvaluableNodeImmediateValueWithType target_index, EvaluableNode *target_value) + { + size_t new_size = stack_nodes.size() + constructionStackOffsetStride; + stack_nodes.resize(new_size, nullptr); + + stack_nodes[new_size + constructionStackOffsetTargetOrigin] = target_origin; + stack_nodes[new_size + constructionStackOffsetTarget] = target; + stack_nodes[new_size + constructionStackOffsetTargetValue] = target_value; + + stack_node_indices.emplace_back(target_index); + } + + //pushes a new construction context on the stack + //the stack is indexed via the constructionStackOffset* constants + //returns the new size + __forceinline void PushNewConstructionContext(EvaluableNode *target_origin, EvaluableNode *target, EvaluableNodeImmediateValueWithType target_index, EvaluableNode *target_value) + { + return PushNewConstructionContextToStack(*constructionStackNodes, constructionStackIndices, target_origin, target, target_index, target_value); + } + + //pops the top construction context off the stack + inline void PopConstructionContext() + { + size_t new_size = constructionStackNodes->size(); + if(new_size > constructionStackOffsetStride) + new_size -= constructionStackOffsetStride; + else + new_size = 0; + + constructionStackNodes->resize(new_size); + + if(constructionStackIndices.size() > 0) + constructionStackIndices.pop_back(); + } + + //updates the construction index at top of the stack to the new value + __forceinline void SetTopTargetValueIndexInConstructionStack(double new_index) + { + constructionStackIndices.back() = EvaluableNodeImmediateValueWithType(new_index); + } + + __forceinline void SetTopTargetValueIndexInConstructionStack(StringInternPool::StringID new_index) + { + constructionStackIndices.back() = EvaluableNodeImmediateValueWithType(new_index); + } + + //sets the value node for the top reference on the construction stack + //used for updating the current target value + //assumes there is at least one construction stack + __forceinline void SetTopTargetValueReferenceInConstructionStack(EvaluableNode *value) + { + (*constructionStackNodes)[constructionStackNodes->size() + constructionStackOffsetTargetValue] = value; + } + + //Makes sure that args is an active associative array is proper for execution context, meaning initialized assoc and a unique reference. + // Will allocate a new node appropriately if it is not + //Then wraps the args on a list which will form the execution context stack and returns that + //ensures that args is still a valid EvaluableNodeReference after the call + static EvaluableNodeReference ConvertArgsToCallStack(EvaluableNodeReference &args, EvaluableNodeManager *enm); + + //finds a pointer to the location of the symbol's pointer to value in the top of the context stack and returns a pointer to the location of the symbol's pointer to value, + // nullptr if it does not exist + // also sets call_stack_index to the level in the call stack that it was found + EvaluableNode **GetExecutionContextSymbolLocation(const StringInternPool::StringID symbol_sid, size_t &call_stack_index); + + //like the other type of GetExecutionContextSymbolLocation, but returns the EvaluableNode pointer instead of a pointer-to-a-pointer + __forceinline EvaluableNode *GetExecutionContextSymbol(const StringInternPool::StringID symbol_sid) + { + size_t call_stack_index = 0; + EvaluableNode **en_ptr = GetExecutionContextSymbolLocation(symbol_sid, call_stack_index); + if(en_ptr == nullptr) + return nullptr; + + return *en_ptr; + } + + //finds a pointer to the location of the symbol's pointer to value or creates the symbol in the top of the context stack and returns a pointer to the location of the symbol's pointer to value + // also sets call_stack_index to the level in the call stack that it was found + EvaluableNode **GetOrCreateExecutionContextSymbolLocation(const StringInternPool::StringID symbol_sid, size_t &call_stack_index); + + //creates a stack state saver for the interpreterNodeStack, which will be restored back to its previous condition when this object is destructed + __forceinline EvaluableNodeStackStateSaver CreateInterpreterNodeStackStateSaver() + { + return EvaluableNodeStackStateSaver(interpreterNodeStackNodes); + } + + //like CreateInterpreterNodeStackStateSaver, but also pushes another node on the stack + __forceinline EvaluableNodeStackStateSaver CreateInterpreterNodeStackStateSaver(EvaluableNode *en) + { + //count on C++ return value optimization to not call the destructor + return EvaluableNodeStackStateSaver(interpreterNodeStackNodes, en); + } + + //keeps the current node on the stack and calls InterpretNodeExecution + EvaluableNodeReference InterpretNode(EvaluableNode *en); + + //returns the number of steps executed since Interpreter was created + constexpr ExecutionCycleCount GetNumStepsExecuted() + { return curExecutionStep; } + + //returns the number of nodes allocated to all contained entities since Interpreter was created + constexpr size_t GetNumEntityNodesAllocated() + { return curNumExecutionNodesAllocatedToEntities; } + + //Current entity that is being interpreted upon. If null, then it is assumed to be running in sandboxed mode + Entity *curEntity; + + //random stream to get random numbers from + RandomStream randomStream; + + //references to listeners for writes on an Entity and prints + std::vector *writeListeners; + PrintListener *printListener; + + //where to allocate new nodes + EvaluableNodeManager *evaluableNodeManager; + + //returns the current execution context, nullptr if none + EvaluableNode *GetCurrentExecutionContext(); + + //if n is immediate, it just returns it, otherwise calls InterpretNode + __forceinline EvaluableNodeReference InterpretNodeForImmediateUse(EvaluableNode *n) + { + if(n == nullptr || n->GetIsIdempotent()) + return EvaluableNodeReference(n, false); + return InterpretNode(n); + } + + //Calls InterpretNode on n, converts to std::string and stores in value to return, then cleans up any resources used + //returns a pair of bool, whether it was a valid string (and not NaS), and the string + std::pair InterpretNodeIntoStringValue(EvaluableNode *n); + + //Calls InterpretNode on n, converts to std::string and stores in value to return, then cleans up any resources used + // but if n is null, it will return an empty string + std::string InterpretNodeIntoStringValueEmptyNull(EvaluableNode *n); + + //like InterpretNodeIntoStringValue, but returns the ID only if the string already exists, otherwise it returns NOT_A_STRING_ID + StringInternPool::StringID InterpretNodeIntoStringIDValueIfExists(EvaluableNode *n); + + //like InterpretNodeIntoStringValue, but creates a reference to the string that must be destroyed, regardless of whether the string existed or not (if it did not exist, then it creates one) + StringInternPool::StringID InterpretNodeIntoStringIDValueWithReference(EvaluableNode *n); + + //Calls InterpnetNode on n, convers to a string, and makes sure that the node returned is new and unique so that it can be modified + EvaluableNode *InterpretNodeIntoUniqueStringIDValueEvaluableNode(EvaluableNode *n); + + //Calls InterpretNode on n, converts to double and returns, then cleans up any resources used + double InterpretNodeIntoNumberValue(EvaluableNode *n); + + //Calls InterpnetNode on n, convers to a double, and makes sure that the node returned is new and unique so that it can be modified + EvaluableNode *InterpretNodeIntoUniqueNumberValueEvaluableNode(EvaluableNode *n); + + //Calls InterpretNode on n, converts to boolean and returns, then cleans up any resources used + bool InterpretNodeIntoBoolValue(EvaluableNode *n, bool value_if_null = false); + + //Calls InterpretNode on n, converts n into a destination for an Entity, relative to curEntity. + // If invalid, destination_entity_parent will be curEntity and new_entity_id will be the empty string + //new_entity_id is an alocated string, and the caller is responsible for freeing it + void InterpretNodeIntoDestinationEntity(EvaluableNode *n, Entity *&destination_entity_parent, StringInternRef &new_entity_id); + + //traverses source based on traversal path list tpl + // If create_destination_if_necessary is set, then it will expand anything in the source as appropriate + //Returns the location of the EvaluableNode * of the destination, nullptr if it does not exist + EvaluableNode **TraverseToDestinationFromTraversalPathList(EvaluableNode **source, EvaluableNodeReference &tpl, bool create_destination_if_necessary); + + //calls InterpretNode on tpl, traverses source based on tpl. + // If create_destination_if_necessary is set, then it will expand anything in the source as appropriate + //Returns the location of the EvaluableNode * of the destination, nullptr if it does not exist + __forceinline EvaluableNode **InterpretNodeIntoDestinationFromTraversalPathList(EvaluableNode **source, + EvaluableNode *tpl, bool create_destination_if_necessary) + { + EvaluableNodeReference address_list_node = InterpretNodeForImmediateUse(tpl); + EvaluableNode **destination = TraverseToDestinationFromTraversalPathList(source, address_list_node, create_destination_if_necessary); + evaluableNodeManager->FreeNodeTreeIfPossible(address_list_node); + return destination; + } + + //Interprets node_id_path_to_interpret and then attempts to find the Entity relative to curEntity. Returns nullptr if cannot find + Entity *InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(EvaluableNode *node_id_path_to_interpret); + +protected: + + //Traverses down n until it reaches the furthest-most nodes from top_node, then bubbles back up re-evaluating each node via the specified function + // Returns the (potentially) modified tree of n, modified in-place + EvaluableNode *RewriteByFunction(EvaluableNodeReference function, EvaluableNode *top_node, EvaluableNode *n, EvaluableNode::ReferenceSetType &references); + +#ifdef MULTITHREAD_SUPPORT + + //class to manage the data for concurrent execution by an interpreter + class ConcurrencyManager + { + public: + + //constructs the concurrency manager. Assumes parent_interpreter is NOT null + ConcurrencyManager(Interpreter *parent_interpreter, size_t num_elements) + { + parentInterpreter = parent_interpreter; + numElements = num_elements; + + //set up data + interpreters.reserve(numElements); + resultFutures.reserve(numElements); + + size_t max_execution_steps_per_element = 0; + if(parentInterpreter->maxNumExecutionSteps > 0) + max_execution_steps_per_element = (parentInterpreter->maxNumExecutionSteps - parentInterpreter->GetNumStepsExecuted()) / numElements; + + //set up all the interpreters + // do this as its own loop to make sure that the vector memory isn't reallocated once the threads have kicked off + for(size_t element_index = 0; element_index < numElements; element_index++) + { + //create interpreter + interpreters.emplace_back(std::make_unique(parentInterpreter->evaluableNodeManager, max_execution_steps_per_element, parentInterpreter->maxNumExecutionNodes, + parentInterpreter->randomStream.CreateOtherStreamViaRand(), + parentInterpreter->writeListeners, parentInterpreter->printListener, parentInterpreter->curEntity)); + } + + //begins concurrency over all interpreters + parentInterpreter->memoryModificationLock.unlock(); + } + + //Enqueues a concurrent task resultFutures that needs a construction stack, using the relative interpreter + // executes node_to_execute with the following parameters matching those of pushing on the construction stack + // will allocate an approrpiate node matching the type of target_index_type and target_index + void PushTaskToResultFuturesWithConstructionStack(EvaluableNode *node_to_execute, + EvaluableNode *target_origin, EvaluableNode *target, EvaluableNodeImmediateValueWithType target_index, EvaluableNode *target_value) + { + //get the interpreter corresponding to the resultFutures + Interpreter *interpreter = interpreters[resultFutures.size()].get(); + + resultFutures.emplace_back( + Concurrency::threadPool.EnqueueBatchTask( + [this, interpreter, node_to_execute, target_origin, target, target_index, target_value] + { + EvaluableNodeManager *enm = interpreter->evaluableNodeManager; + interpreter->memoryModificationLock = Concurrency::ReadLock(enm->memoryModificationMutex); + + //build new construction stack + EvaluableNode *construction_stack = enm->AllocListNode(parentInterpreter->constructionStackNodes); + std::vector construction_stack_indices(parentInterpreter->constructionStackIndices); + interpreter->PushNewConstructionContextToStack(construction_stack->GetOrderedChildNodes(), construction_stack_indices, target_origin, target, target_index, target_value); + + auto result = interpreter->ExecuteNode(node_to_execute, + enm->AllocListNode(parentInterpreter->callStackNodes), + enm->AllocListNode(parentInterpreter->interpreterNodeStackNodes), + construction_stack, + &construction_stack_indices, + GetCallStackWriteMutex()); + + enm->KeepNodeReference(result); + + interpreter->memoryModificationLock.unlock(); + return result; + } + ) + ); + } + + //ends concurrency from all interpreters and waits for them to finish + inline void EndConcurrency() + { + Concurrency::threadPool.CountCurrentThreadAsPaused(); + + //make sure all futures return before moving on + for(auto &future : resultFutures) + future.wait(); + + if(!parentInterpreter->AllowUnlimitedExecutionSteps()) + { + for(auto &i : interpreters) + parentInterpreter->curExecutionStep += i->curExecutionStep; + } + + Concurrency::threadPool.CountCurrentThreadAsResumed(); + + parentInterpreter->memoryModificationLock.lock(); + } + + //returns results from the futures + // assumes that each result has had KeepNodeReference called upon it, otherwise it'd have not been safe, + // so it calls FreeNodeReference on each + inline std::vector GetResultsAndFreeReferences() + { + std::vector results; + results.resize(numElements); + + //fill in results from result_futures and free references + // note that std::future becomes invalid once get is called + for(size_t i = 0; i < numElements; i++) + results[i] = resultFutures[i].get(); + + parentInterpreter->evaluableNodeManager->FreeNodeReferences(results); + + return results; + } + + //returns the relevant write mutex for the call stack + constexpr Concurrency::SingleMutex *GetCallStackWriteMutex() + { + //if there is one currently in use, use it + if(parentInterpreter->callStackWriteMutex != nullptr) + return parentInterpreter->callStackWriteMutex; + + //start a new one + return &callStackWriteMutex; + } + + //interpreters run concurrently, the size of numElements + std::vector> interpreters; + + //where results are placed, the size of numElements + std::vector> resultFutures; + + //mutex to allow only one thread to write to a call stack symbol at once + Concurrency::SingleMutex callStackWriteMutex; + + protected: + //interpreter that is running all the concurrent interpreters + Interpreter *parentInterpreter; + + //the number of elements being processed + size_t numElements; + }; + + //computes the nodes concurrently and stores the interpreted values into interpreted_nodes + // looks to parent_node to whether concurrency is enabled + //returns true if it is able to interpret the nodes concurrently + bool InterpretEvaluableNodesConcurrently(EvaluableNode *parent_node, std::vector &nodes, std::vector &interpreted_nodes); + + //returns false if this or any calling interpreter is currently running on the entity specified or if there is any active concurrency + // actively editing an entity's EvaluableNode data can cause memory errors if being accessed elsewhere, so a copy must be made + bool IsEntitySafeForModification(Entity *entity) + { + for(Interpreter *cur_interpreter = this; cur_interpreter != nullptr; cur_interpreter = cur_interpreter->callingInterpreter) + { + //if accessing the entity or have multiple threads, can't ensure safety + if(cur_interpreter->curEntity == entity || cur_interpreter->callStackSharedAccessStartingDepth > 0) + return false; + } + + return true; + } + +#endif + + //recalculates curNumExecutionNodes + __forceinline void UpdateCurNumExecutionNodes() + { + curNumExecutionNodes = curNumExecutionNodesAllocatedToEntities + evaluableNodeManager->GetNumberOfUsedNodes(); + } + + //if true, no limit to how long can utilize CPU + constexpr bool AllowUnlimitedExecutionSteps() + { return maxNumExecutionSteps == 0; } + + constexpr ExecutionCycleCount GetRemainingNumExecutionSteps() + { + if(curExecutionStep < maxNumExecutionSteps) + return maxNumExecutionSteps - curExecutionStep; + else //already past limit + return 0; + } + + //if true, no limit on how much memory can utilize + constexpr bool AllowUnlimitedExecutionNodes() + { return maxNumExecutionNodes == 0; } + + constexpr size_t GetRemainingNumExecutionNodes() + { + if(curNumExecutionNodes < maxNumExecutionNodes) + return maxNumExecutionNodes - curNumExecutionNodes; + else //already past limit + return 0; + } + + //returns true if there's a max number of execution steps or nodes and at least one is exhausted + constexpr bool AreExecutionResourcesExhausted() + { + if(!AllowUnlimitedExecutionSteps() && curExecutionStep >= maxNumExecutionSteps) + return true; + + if(!AllowUnlimitedExecutionNodes() && curNumExecutionNodes >= maxNumExecutionNodes) + return true; + + return false; + } + + //opcodes + //returns an EvaluableNode tree from evaluating the tree passed in (or nullptr) and associated properties in an EvaluableNodeReference + //prior to calling, en must be referenced (via KeepNodeReference, or part of an entity) so it will not be garbage collected + //further, for performance, en must be guaranteed to be a valid pointer, and not nullptr + + //built-in / system specific + EvaluableNodeReference InterpretNode_ENT_SYSTEM(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_GET_DEFAULTS(EvaluableNode *en); + + //parsing + EvaluableNodeReference InterpretNode_ENT_PARSE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_UNPARSE(EvaluableNode *en); + + //core control + EvaluableNodeReference InterpretNode_ENT_IF(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SEQUENCE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_PARALLEL(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_LAMBDA(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CONCLUDE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CALL(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CALL_SANDBOXED(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_WHILE(EvaluableNode *en); + + //definitions + EvaluableNodeReference InterpretNode_ENT_LET(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_DECLARE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ASSIGN_and_ACCUM(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_RETRIEVE(EvaluableNode *en); + + //base math + EvaluableNodeReference InterpretNode_ENT_ADD(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SUBTRACT(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MULTIPLY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_DIVIDE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MODULUS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_GET_DIGITS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_DIGITS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_FLOOR(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CEILING(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ROUND(EvaluableNode *en); + + //extended math + EvaluableNodeReference InterpretNode_ENT_EXPONENT(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_LOG(EvaluableNode *en); + + EvaluableNodeReference InterpretNode_ENT_SIN(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ASIN(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_COS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ACOS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_TAN(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ATAN(EvaluableNode *en); + + EvaluableNodeReference InterpretNode_ENT_SINH(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ASINH(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_COSH(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ACOSH(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_TANH(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ATANH(EvaluableNode *en); + + EvaluableNodeReference InterpretNode_ENT_ERF(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_TGAMMA(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_LGAMMA(EvaluableNode *en); + + EvaluableNodeReference InterpretNode_ENT_SQRT(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_POW(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ABS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MAX(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MIN(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_DOT_PRODUCT(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_GENERALIZED_DISTANCE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ENTROPY(EvaluableNode *en); + + //list manipulation + EvaluableNodeReference InterpretNode_ENT_FIRST(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_TAIL(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_LAST(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_TRUNC(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_APPEND(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SIZE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_RANGE(EvaluableNode *en); + + //transformation + EvaluableNodeReference InterpretNode_ENT_REWRITE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MAP(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_FILTER(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_WEAVE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_REDUCE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_APPLY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_REVERSE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SORT(EvaluableNode *en); + + //associative list manipulation + EvaluableNodeReference InterpretNode_ENT_INDICES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_VALUES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CONTAINS_INDEX(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CONTAINS_VALUE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_REMOVE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_KEEP(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ASSOCIATE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ZIP(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_UNZIP(EvaluableNode *en); + + //retrieval + EvaluableNodeReference InterpretNode_ENT_GET(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_and_REPLACE(EvaluableNode *en); + + //stack and node manipulation + EvaluableNodeReference InterpretNode_ENT_TARGET(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_TARGET_INDEX(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_TARGET_VALUE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_STACK(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ARGS(EvaluableNode *en); + + //logic + EvaluableNodeReference InterpretNode_ENT_AND(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_OR(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_XOR(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_NOT(EvaluableNode *en); + + //equivalence + EvaluableNodeReference InterpretNode_ENT_EQUAL(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_NEQUAL(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_LESS_and_LEQUAL(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_GREATER_and_GEQUAL(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_TYPE_EQUALS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_TYPE_NEQUALS(EvaluableNode *en); + + //simulation and operations + EvaluableNodeReference InterpretNode_ENT_RAND(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_WEIGHTED_RAND(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_GET_RAND_SEED(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_RAND_SEED(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SYSTEM_TIME(EvaluableNode *en); + + //built-in constants and variables + EvaluableNodeReference InterpretNode_ENT_TRUE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_FALSE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_NULL(EvaluableNode *en); + + //data types + EvaluableNodeReference InterpretNode_ENT_LIST(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ASSOC(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_NUMBER(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_STRING(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SYMBOL(EvaluableNode *en); + + //node types + EvaluableNodeReference InterpretNode_ENT_GET_TYPE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_GET_TYPE_STRING(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_TYPE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_FORMAT(EvaluableNode *en); + + //EvaluableNode management: labels, comments, and concurrency + EvaluableNodeReference InterpretNode_ENT_GET_LABELS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_GET_ALL_LABELS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_LABELS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ZIP_LABELS(EvaluableNode *en); + + EvaluableNodeReference InterpretNode_ENT_GET_COMMENTS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_COMMENTS(EvaluableNode *en); + + EvaluableNodeReference InterpretNode_ENT_GET_CONCURRENCY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_CONCURRENCY(EvaluableNode *en); + + EvaluableNodeReference InterpretNode_ENT_GET_VALUE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_VALUE(EvaluableNode *en); + + //string + EvaluableNodeReference InterpretNode_ENT_EXPLODE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SPLIT(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SUBSTR(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CONCAT(EvaluableNode *en); + + //encryption + EvaluableNodeReference InterpretNode_ENT_CRYPTO_SIGN(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CRYPTO_SIGN_VERIFY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ENCRYPT(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_DECRYPT(EvaluableNode *en); + + //I/O + EvaluableNodeReference InterpretNode_ENT_PRINT(EvaluableNode *en); + + //tree merging + EvaluableNodeReference InterpretNode_ENT_TOTAL_SIZE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MUTATE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_COMMONALITY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_EDIT_DISTANCE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_INTERSECT(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_UNION(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_DIFFERENCE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MIX(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MIX_LABELS(EvaluableNode *en); + + //entity merging + EvaluableNodeReference InterpretNode_ENT_TOTAL_ENTITY_SIZE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_FLATTEN_ENTITY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MUTATE_ENTITY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_COMMONALITY_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_EDIT_DISTANCE_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_INTERSECT_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_UNION_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_DIFFERENCE_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MIX_ENTITIES(EvaluableNode *en); + + //entity details + EvaluableNodeReference InterpretNode_ENT_GET_ENTITY_COMMENTS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_RETRIEVE_ENTITY_ROOT(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ASSIGN_ENTITY_ROOTS_and_ACCUM_ENTITY_ROOTS(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_GET_ENTITY_RAND_SEED(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_ENTITY_RAND_SEED(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_GET_ENTITY_ROOT_PERMISSION(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_SET_ENTITY_ROOT_PERMISSION(EvaluableNode *en); + + //entity base actions + EvaluableNodeReference InterpretNode_ENT_CREATE_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CLONE_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_MOVE_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_DESTROY_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_LOAD(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_LOAD_ENTITY_and_LOAD_PERSISTENT_ENTITY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_STORE(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_STORE_ENTITY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CONTAINS_ENTITY(EvaluableNode *en); + + //entity query + EvaluableNodeReference InterpretNode_ENT_CONTAINED_ENTITIES_and_COMPUTE_ON_CONTAINED_ENTITIES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_QUERY_and_COMPUTE_opcodes(EvaluableNode *en); + + //entity access + EvaluableNodeReference InterpretNode_ENT_CONTAINS_LABEL(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_ASSIGN_TO_ENTITIES_and_DIRECT_ASSIGN_TO_ENTITIES_and_ACCUM_TO_ENTITIES(EvaluableNode *en); + + EvaluableNodeReference InterpretNode_ENT_RETRIEVE_FROM_ENTITY_and_DIRECT_RETRIEVE_FROM_ENTITY(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CALL_ENTITY_and_CALL_ENTITY_GET_CHANGES(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_CALL_CONTAINER(EvaluableNode *en); + + EvaluableNodeReference InterpretNode_ENT_DEALLOCATED(EvaluableNode *en); + EvaluableNodeReference InterpretNode_ENT_NOT_A_BUILT_IN_TYPE(EvaluableNode *en); + + //override hook for debugging + EvaluableNodeReference InterpretNode_DEBUG(EvaluableNode *en); + + //ensures that there are no reachable nodes that are deallocated + void ValidateEvaluableNodeIntegrity(); + + //Current execution step - number of nodes executed + ExecutionCycleCount curExecutionStep; + + //Maximum number of execution steps by this Interpreter and anything called from it. If 0, then unlimited. + //Will terminate execution if the value is reached + ExecutionCycleCount maxNumExecutionSteps; + + //Current number of nodes created by this interpreter, to be compared to maxNumExecutionNodes + // should be the sum of curNumExecutionNodesAllocatedToEntities plus any temporary nodes + size_t curNumExecutionNodes; + + //number of nodes allocated only to entities + size_t curNumExecutionNodesAllocatedToEntities; + + //Maximum number of nodes allowed to be allocated by this Interpreter and anything called from it. If 0, then unlimited. + //Will terminate execution if the value is reached + size_t maxNumExecutionNodes; + + //The current execution context; the call stack + std::vector *callStackNodes; + + //A stack (list) of the current nodes being executed + std::vector *interpreterNodeStackNodes; + + //The current construction stack, containing an interleaved array of nodes + std::vector *constructionStackNodes; + + //current index for each level of constructionStackNodes; + //note, this should always be the same size as constructionStackNodes + std::vector constructionStackIndices; + + //buffer to use as for parsing and querying conditions + std::vector conditionsBuffer; + + //the interpreter that called this one -- used for debugging + Interpreter *callingInterpreter; + +#ifdef MULTITHREAD_SUPPORT +public: + //mutex to lock the memory from the EvaluableNodeManager it is using + Concurrency::ReadLock memoryModificationLock; + +protected: + + //the depth of the call stack where multiple threads may modify the same variables + size_t callStackSharedAccessStartingDepth; + + //pointer to a mutex for writing to shared variables below callStackSharedAccessStartingDepth + //note that reading does not need to be synchronized because the writes are done with regard to pointers, + // which are an atomic operation on every major processor in the world, and even Linux core libraries are built on this assumption + Concurrency::SingleMutex *callStackWriteMutex; + + //buffer to store read locks for deep locking entities + Concurrency::ReadLocksBuffer entityReadLockBuffer; + + //buffer to store write locks for deep locking entities + Concurrency::WriteLocksBuffer entityWriteLockBuffer; + +#endif + + //opcode function pointers + // each opcode function takes in an EvaluableNode + typedef EvaluableNodeReference(Interpreter::*OpcodeFunction) (EvaluableNode *); + static std::array _opcodes; + + //opcodes that all point to debugging + // can be swapped with _opcodes + static std::array _debug_opcodes; + + //number of items in each level of the constructionStack + static constexpr int64_t constructionStackOffsetStride = 3; + + //index of each item for a given level in the constructionStack relative to the size of the stack minus the level * constructionStackOffsetStride + static constexpr int64_t constructionStackOffsetTargetOrigin = -3; + static constexpr int64_t constructionStackOffsetTarget = -2; + static constexpr int64_t constructionStackOffsetTargetValue = -1; +}; diff --git a/src/Amalgam/interpreter/InterpreterDebugger.cpp b/src/Amalgam/interpreter/InterpreterDebugger.cpp new file mode 100644 index 00000000..352eed32 --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterDebugger.cpp @@ -0,0 +1,747 @@ +//project headers: +#include "Interpreter.h" + +#include "AssetManager.h" +#include "StringManipulation.h" + +//makes an array of all the same value +template +constexpr std::array make_array_of_duplicate_values(T value) +{ + std::array a{}; + for(auto &x : a) + x = value; + return a; +} + +//initially all set to point to debug +std::array Interpreter::_debug_opcodes += make_array_of_duplicate_values(&Interpreter::InterpretNode_DEBUG); + +//global static data for debugging +struct InterpreterDebugData +{ + //sets interactiveMode and handles any threading issues + //any modifications to breakpoints triggered should occur before calling this method + void EnableInteractiveMode() + { + interactiveMode = true; + #ifdef MULTITHREAD_SUPPORT + interactiveModeThread = std::this_thread::get_id(); + #endif + } + + //if true, then the user is interacting + bool interactiveMode = true; + +#ifdef MULTITHREAD_SUPPORT + //when interactiveMode is true, it'll keep running until + // interactiveModeThread gets its chance to run + std::thread::id interactiveModeThread = std::thread::id(); +#endif + + //labels to break on + std::vector breakLabels; + + //opcodes to break on + std::vector breakOpcodes; + + //strings containing line number followed by filename to break on + std::vector breakLineFile; + + //will run until it reaches this label, then it will clear it + std::string runUntilLabel = ""; + + //will run until it reaches the next occurance of this opcode, then it will clear it + EvaluableNodeType runUntilOpcodeType = ENT_NOT_A_BUILT_IN_TYPE; + + //will run until this opcode is reached. should only be used for opcodes that are preserved in the callstack + EvaluableNode *runUntilOpcode = nullptr; + + //will run until the call stack size is this value + size_t runUntilCallStackSize = 0; + +#ifdef MULTITHREAD_SUPPORT + //only one debugger can set this at a time + Concurrency::SingleMutex debuggingMutex; +#endif + +} _interpreter_debug_data; + +//if s is longer than max_num_chars, it modifies the string, clamping it +// at the length or newline and adding an ellipsis +void ClampSingleLineStringLength(std::string &s, size_t max_num_chars, std::string ellipsis = "...") +{ + if(max_num_chars < ellipsis.size()) + max_num_chars = ellipsis.size(); + + //throw away everything on and after at the first newline + s = s.substr(0, s.find('\n')); + + if(s.size() > max_num_chars) + { + //leave room for ellipsis + s.resize(max_num_chars - ellipsis.size()); + s += ellipsis; + } +} + +//prints the node and its comment both truncated to max_num_chars or newline +std::pair StringifyNode(EvaluableNode *en, EvaluableNodeManager *enm, size_t max_num_chars = 100) +{ + //if no comments, then can just print + if(en == nullptr || en->GetCommentsStringId() == string_intern_pool.NOT_A_STRING_ID) + { + std::string code_str = Parser::Unparse(en, enm, false, true, true); + ClampSingleLineStringLength(code_str, max_num_chars); + return std::make_pair(std::string(), code_str); + } + else //has comments, so need to thoughtfully handle showing first line of comments and appropriate amount of code + { + //get comment, and make it look like a comment + std::string comment_str; + comment_str += en->GetCommentsString(); + + //if debug sources enabled, don't clamp the line, make sure it prints out the whole filename + if(asset_manager.debugSources) + max_num_chars = std::numeric_limits::max(); + + ClampSingleLineStringLength(comment_str, max_num_chars); + + //append with code + EvaluableNode en_without_comment(en); + en_without_comment.ClearComments(); + std::string code_str = Parser::Unparse(&en_without_comment, enm, false, true, true); + ClampSingleLineStringLength(code_str, max_num_chars); + + return std::make_pair(comment_str, code_str); + } +} + +//prints the current node for a stack trace +void PrintStackNode(EvaluableNode *en, EvaluableNodeManager *enm, size_t max_num_chars = 100) +{ + auto [comment_str, node_str] = StringifyNode(en, enm); + if(!asset_manager.debugSources || comment_str == "") + { + std::cout << " opcode: " << node_str << std::endl; + } + else //comment + { + std::cout << " comment:" << comment_str << std::endl; + std::cout << " opcode: " << node_str << std::endl; + } +} + +EvaluableNodeReference Interpreter::InterpretNode_DEBUG(EvaluableNode *en) +{ + DebugCheckBreakpointsAndUpdateState(en, true); + + EvaluableNodeType cur_node_type = ENT_NULL; + if(en != nullptr) + cur_node_type = en->GetType(); + + bool enter_interactive_mode = false; + if(_interpreter_debug_data.interactiveMode) + { + #ifdef MULTITHREAD_SUPPORT + //if the thread id to look for matches, then clear the thread id + if(_interpreter_debug_data.interactiveModeThread == std::this_thread::get_id()) + _interpreter_debug_data.interactiveModeThread = std::thread::id(); + + //if there's no thread id to look for (e.g., the thread that had the breakpoint + // broke, so all future threads should stop), then enter_interactive_mode + if(_interpreter_debug_data.interactiveModeThread == std::thread::id()) + #endif + enter_interactive_mode = true; + } + + if(!enter_interactive_mode) + { + //get corresponding opcode stored in _debug_opcodes + auto oc = _debug_opcodes[cur_node_type]; + EvaluableNodeReference retval = (this->*oc)(en); + + //check for debug after execution + DebugCheckBreakpointsAndUpdateState(en, false); + + return retval; + } + +#ifdef MULTITHREAD_SUPPORT + //only one debugger at a time + Concurrency::SingleLock lock(_interpreter_debug_data.debuggingMutex); + + //if it's no longer in interactiveMode since lock, then go back to normal execution + if(!_interpreter_debug_data.interactiveMode) + { + //don't leave the lock locked while recursing into opcodes + lock.unlock(); + + //get corresponding opcode stored in _debug_opcodes + auto oc = _debug_opcodes[cur_node_type]; + EvaluableNodeReference retval = (this->*oc)(en); + + //check for debug after execution + DebugCheckBreakpointsAndUpdateState(en, false); + + return retval; + } + + //get the current thread + std::thread::id this_thread_id = std::this_thread::get_id(); +#endif + + while(true) + { + auto entity_sid = string_intern_pool.NOT_A_STRING_ID; + if(curEntity != nullptr) + entity_sid = curEntity->GetIdStringId(); + + if(asset_manager.debugMinimal) + { + //use carriage return sequence to signify the end of transmission + #ifdef MULTITHREAD_SUPPORT + std::cout << "\r\r" << this_thread_id << " >" << std::endl; + #else + std::cout << "\r\r>" << std::endl; + #endif + } + else + { + if(entity_sid != string_intern_pool.NOT_A_STRING_ID) + std::cout << "Entity: " << string_intern_pool.GetStringFromID(entity_sid) << std::endl; + + #ifdef MULTITHREAD_SUPPORT + std::cout << "Thread: " << this_thread_id << std::endl; + #endif + + auto [comment_str, node_str] = StringifyNode(en, evaluableNodeManager); + if(comment_str == "") + { + std::cout << "Current opcode: " << node_str << std::endl; + } + else //comment + { + std::cout << "Current comment:" << comment_str << std::endl; + std::cout << "Current opcode: " << node_str << std::endl; + } + std::cout << "> "; + } + + std::string input; + std::getline(std::cin, input); + std::string command = StringManipulation::RemoveFirstWord(input); + + if(command == "help") + { + std::cout << "Debugging commands:" << std::endl; + std::cout << "help: display this message" << std::endl; + std::cout << "quit: quit the program and exit" << std::endl; + std::cout << "s: step to next opcode (step into)" << std::endl; + std::cout << "n: runs to next opcode (step over)" << std::endl; + std::cout << "f: finish current opcode (step up)" << std::endl; + std::cout << "fc: finish call (step out)" << std::endl; + std::cout << "ul label: runs until it encounters a node with label" << std::endl; + std::cout << "uo opcode: runs until it encounters a node of type opcode" << std::endl; + std::cout << "c: continues until next breakpoint" << std::endl; + std::cout << "finish: finish running the program, leaving debug mode, running at full speed" << std::endl; + std::cout << "bl label: toggles breakpoint at the label" << std::endl; + std::cout << "bn line_number file: toggles breakpoint at the line number for file" << std::endl; + std::cout << "bo opcode: toggles breakpoint on all occurances of opcode" << std::endl; + std::cout << "br: lists breakpoints" << std::endl; + std::cout << "stack: prints out the stack" << std::endl; + std::cout << "entities: prints out the contained entities" << std::endl; + std::cout << "entity [name]: prints out the entity specified, current entity if name omitted" << std::endl; + std::cout << "labels [name]: prints out the labels of the entity specified, current entity if name omitted" << std::endl; + std::cout << "vars: prints out the variables, grouped by each layer going up the stack" << std::endl; + std::cout << "p [var]: prints variable var" << std::endl; + std::cout << "pv [var]: prints only the value of the variable var (no comments or labels)" << std::endl; + std::cout << "pp [var]: prints only a preview of the value of the variable var (no comments or labels)" << std::endl; + std::cout << "eval [expression]: evaluates expression" << std::endl; + std::cout << "validate: validate memory integrity" << std::endl; + #ifdef MULTITHREAD_SUPPORT + std::cout << "threads: displays the current thread ids" << std::endl; + #endif + continue; + } + else if(command == "quit") + { + exit(0); + } + else if(command == "s") + { + if(EvaluableNode::IsNull(en)) + return EvaluableNodeReference::Null(); + + //exit interactive loop + break; + } + else if(command == "n") + { + _interpreter_debug_data.runUntilOpcode = en; + + //run until breakpoint + _interpreter_debug_data.interactiveMode = false; + + //exit interactive loop + break; + } + else if(command == "f" || command == "fc" || command == "ul" || command == "uo" || command == "c") + { + if(command == "f") + { + if(interpreterNodeStackNodes->size() > 0) + _interpreter_debug_data.runUntilOpcode = interpreterNodeStackNodes->back(); + } + else if(command == "fc") + { + if(callStackNodes->size() > 0) + _interpreter_debug_data.runUntilCallStackSize = callStackNodes->size() - 1; + } + else if(command == "ul") + { + //go back to prompt if not a string + if(input == "") + continue; + + _interpreter_debug_data.runUntilLabel = input; + } + else if(command == "uo") + { + _interpreter_debug_data.runUntilOpcodeType = GetEvaluableNodeTypeFromString(input, true); + + //go back to prompt if not valid type + if(_interpreter_debug_data.runUntilOpcodeType == ENT_NOT_A_BUILT_IN_TYPE) + continue; + } + + //run until breakpoint + _interpreter_debug_data.interactiveMode = false; + + //exit interactive loop + break; + } + else if(command == "finish") + { + SetDebuggingState(false); + + //get regular opcode, not the debug one + auto oc = _opcodes[cur_node_type]; + + #ifdef MULTITHREAD_SUPPORT + //unlock before executing + lock.unlock(); + #endif + + EvaluableNodeReference retval = (this->*oc)(en); + return retval; + } + else if(command == "bl") + { + if(input != "") + { + auto found = std::find(begin(_interpreter_debug_data.breakLabels), + end(_interpreter_debug_data.breakLabels), input); + if(found == end(_interpreter_debug_data.breakLabels)) + { + _interpreter_debug_data.breakLabels.push_back(input); + std::cout << "Added breakpoint for label " << input << std::endl; + } + else + { + _interpreter_debug_data.breakLabels.erase(found); + std::cout << "Removed breakpoint for label " << input << std::endl; + } + } + } + else if(command == "bn") + { + auto found = std::find(begin(_interpreter_debug_data.breakLineFile), + end(_interpreter_debug_data.breakLineFile), input); + if(found == end(_interpreter_debug_data.breakLineFile)) + { + _interpreter_debug_data.breakLineFile.push_back(input); + std::cout << "Added breakpoint for " << input << std::endl; + } + else + { + _interpreter_debug_data.breakLineFile.erase(found); + std::cout << "Removed breakpoint for " << input << std::endl; + } + } + else if(command == "bo") + { + auto break_opcode = GetEvaluableNodeTypeFromString(input); + if(break_opcode != ENT_NOT_A_BUILT_IN_TYPE) + { + auto found = std::find(begin(_interpreter_debug_data.breakOpcodes), + end(_interpreter_debug_data.breakOpcodes), break_opcode); + if(found == end(_interpreter_debug_data.breakOpcodes)) + { + _interpreter_debug_data.breakOpcodes.push_back(break_opcode); + std::cout << "Added breakpoint for opcode " << input << std::endl; + } + else + { + _interpreter_debug_data.breakOpcodes.erase(found); + std::cout << "Removed breakpoint for opcode " << input << std::endl; + } + } + } + else if(command == "br") + { + std::cout << "Opcodes Breakpoints:" << std::endl; + for(auto break_opcode : _interpreter_debug_data.breakOpcodes) + std::cout << " " << GetStringFromEvaluableNodeType(break_opcode) << std::endl; + + std::cout << "Label Breakpoints:" << std::endl; + for(auto &break_label : _interpreter_debug_data.breakLabels) + std::cout << " " << break_label << std::endl; + + std::cout << "Line Breakpoints:" << std::endl; + for(auto &break_line : _interpreter_debug_data.breakLineFile) + std::cout << " " << break_line << std::endl; + } + else if(command == "stack") + { + std::cout << "Construction stack:" << std::endl; + for(EvaluableNode *csn : *constructionStackNodes) + PrintStackNode(csn, evaluableNodeManager); + + std::cout << "Call stack:" << std::endl; + for(EvaluableNode *csn : *callStackNodes) + PrintStackNode(csn, evaluableNodeManager); + + std::cout << "Interpret node stack:" << std::endl; + for(EvaluableNode *insn : *interpreterNodeStackNodes) + PrintStackNode(insn, evaluableNodeManager); + } + else if(command == "entities") + { + if(curEntity != nullptr && curEntity->HasContainedEntities()) + { + for(auto &e : curEntity->GetContainedEntities()) + std::cout << " " << string_intern_pool.GetStringFromID(e->GetIdStringId()) << std::endl; + } + } + else if(command == "entity" || command == "labels") + { + if(curEntity == nullptr) + { + std::cout << "not in an entity" << std::endl; + continue; + } + + Entity *entity = curEntity; + + if(input != "") + entity = curEntity->GetContainedEntity(string_intern_pool.GetIDFromString(input)); + + if(entity == nullptr) + { + std::cout << "Entity " << input << " not found in current entity" << std::endl; + continue; + } + + if(command == "entity") + { + std::cout << entity->GetCodeAsString() << std::endl; + } + else if(command == "labels") + { + entity->IterateFunctionOverLabels([] + (StringInternPool::StringID label_sid, EvaluableNode *node) + { + std::cout << " " << string_intern_pool.GetStringFromID(label_sid) << std::endl; + }); + } + } + else if(command == "vars") + { + //find symbol by walking up the stack; each layer must be an assoc + //count down from the top, and use (i - 1) below to make this loop one-based instead of having to wrap around + for(auto i = callStackNodes->size(); i > 0; i--) + { + EvaluableNode *cur_context = (*callStackNodes)[i - 1]; + + //see if this level of the stack contains the symbol + auto &mcn = cur_context->GetMappedChildNodesReference(); + for(auto &[symbol_id, _] : mcn) + std::cout << " " << string_intern_pool.GetStringFromID(symbol_id) << std::endl; + } + } + else if(command == "p" || command == "pv" || command == "pp") + { + auto sid = string_intern_pool.GetIDFromString(input); + if(sid == string_intern_pool.NOT_A_STRING_ID) + { + std::cout << "string " << input << " is not currently referenced anywhere." << std::endl; + } + else //valid string + { + EvaluableNode *node = nullptr; + bool value_exists = true; + + size_t call_stack_index = 0; + EvaluableNode **en_ptr = GetExecutionContextSymbolLocation(sid, call_stack_index); + if(en_ptr != nullptr) + { + node = *en_ptr; + } + else + { + if(curEntity == nullptr) + { + std::cout << "Variable " << input << " does not exist on the stack, and there is no current entity." << std::endl; + value_exists = false; + } + + node = curEntity->GetValueAtLabel(sid, nullptr, true, true); + if(node == nullptr) + { + std::cout << "Variable " << input << " does not exist on the stack or as a label in the current entity." << std::endl; + value_exists = false; + } + } + + if(value_exists) + { + if(command == "p") + std::cout << Parser::Unparse(node, evaluableNodeManager, true, true, true) << std::endl; + else if(command == "pv") + std::cout << Parser::Unparse(node, evaluableNodeManager, true, false, true) << std::endl; + else if(command == "pp") + { + std::string var_preview = Parser::Unparse(node, evaluableNodeManager, true, false, true); + if(var_preview.size() > 1023) + var_preview.resize(1023); + std::cout << var_preview << std::endl; + } + } + } + } + else if(command == "eval") + { + SetDebuggingState(false); + EvaluableNode *to_eval = Parser::Parse(input, evaluableNodeManager); + EvaluableNodeReference result = InterpretNodeForImmediateUse(to_eval); + std::cout << Parser::Unparse(result, evaluableNodeManager, true, true, true) << std::endl; + SetDebuggingState(true); + } + else if(command == "validate") + { + ValidateEvaluableNodeIntegrity(); + std::cout << "validation completed successfully" << std::endl; + } + #ifdef MULTITHREAD_SUPPORT + else if(command == "threads") + { + auto thread_ids = Concurrency::threadPool.GetThreadIds(); + for(auto &thread_id : thread_ids) + std::cout << " " << thread_id << std::endl; + } + #endif + } + + //finish executing this opcode + +#ifdef MULTITHREAD_SUPPORT + //unlock before executing + lock.unlock(); +#endif + + //get corresponding opcode stored in _debug_opcodes + auto oc = _debug_opcodes[en->GetType()]; + EvaluableNodeReference retval = (this->*oc)(en); + + //check for debug after execution + DebugCheckBreakpointsAndUpdateState(en, false); + + return retval; +} + +void Interpreter::SetDebuggingState(bool debugging_enabled) +{ + if(debugging_enabled) + { + //skip if already debugging + if(_opcodes[0] == &Interpreter::InterpretNode_DEBUG) + return; + } + else //!debugging_enabled + { + //skip if already not debugging + if(_debug_opcodes[0] == &Interpreter::InterpretNode_DEBUG) + return; + } + + //swap debug opcodes for real ones + for(size_t i = 0; i < _opcodes.size(); i++) + std::swap(_opcodes[i], _debug_opcodes[i]); +} + +void Interpreter::DebugCheckBreakpointsAndUpdateState(EvaluableNode *en, bool before_opcode) +{ + EvaluableNodeType cur_node_type = ENT_NULL; + if(en != nullptr) + cur_node_type = en->GetType(); + + //if not interactive, check for events that could trigger interactiveMode + if(!_interpreter_debug_data.interactiveMode) + { + if(_interpreter_debug_data.runUntilOpcodeType == cur_node_type) + { + _interpreter_debug_data.runUntilOpcodeType = ENT_NOT_A_BUILT_IN_TYPE; + _interpreter_debug_data.EnableInteractiveMode(); + } + + //break if finished opcode + if(_interpreter_debug_data.runUntilOpcode == en) + { + _interpreter_debug_data.runUntilOpcode = nullptr; + _interpreter_debug_data.EnableInteractiveMode(); + } + + if(_interpreter_debug_data.runUntilCallStackSize == callStackNodes->size()) + { + _interpreter_debug_data.runUntilCallStackSize = 0; + _interpreter_debug_data.EnableInteractiveMode(); + } + + for(auto boc : _interpreter_debug_data.breakOpcodes) + { + if(cur_node_type == boc) + _interpreter_debug_data.EnableInteractiveMode(); + } + + //only do line breakpoints before hitting an opcode + if(asset_manager.debugSources && before_opcode + && _interpreter_debug_data.breakLineFile.size() > 0) + { + //if it has a source, check against all of the source break points + std::string comment_str = en->GetCommentsString(); + if(comment_str.rfind(Parser::sourceCommentPrefix, 0) != std::string::npos) + { + for(auto &breakpoint_str : _interpreter_debug_data.breakLineFile) + { + //start comment after the prefix + size_t comment_pos = Parser::sourceCommentPrefix.size(); + size_t breakpoint_pos = 0; + + bool breakpoint_match = true; + + //check if line numbers are the same up until the space + for(; comment_pos < comment_str.size() && breakpoint_pos < breakpoint_str.size(); comment_pos++, breakpoint_pos++) + { + //stop if both are done with line number + if(comment_str[comment_pos] == ' ' && breakpoint_str[breakpoint_pos] == ' ') + { + comment_pos++; + breakpoint_pos++; + break; + } + + if(comment_str[comment_pos] != breakpoint_str[breakpoint_pos]) + { + breakpoint_match = false; + break; + } + } + + //can't proceed if line numbers don't match + if(comment_pos == comment_str.size() || !breakpoint_match) + continue; + + //skip over column number in comment + for(; comment_pos < comment_str.size(); comment_pos++) + { + if(comment_str[comment_pos] == ' ') + { + comment_pos++; + break; + } + + //make sure column only consists of number characters; fail if improper format + if(comment_str[comment_pos] > '9' || comment_str[comment_pos] < '0') + { + breakpoint_match = false; + break; + } + } + + //can't proceed if column numbers isn't valid + if(comment_pos == comment_str.size() || !breakpoint_match) + continue; + + //iterate until have reach end of both or found a non-match + for(; comment_pos < comment_str.size() && breakpoint_pos < breakpoint_str.size(); comment_pos++, breakpoint_pos++) + { + //if either line is done, then stop + bool comment_newline = (comment_str[comment_pos] == '\r' || comment_str[comment_pos] == '\n'); + bool breakpoint_line_newline = (breakpoint_str[breakpoint_pos] == '\r' || breakpoint_str[breakpoint_pos] == '\n'); + if(comment_newline || breakpoint_line_newline) + break; + + if(comment_str[comment_pos] != breakpoint_str[breakpoint_pos]) + { + breakpoint_match = false; + break; + } + } + + //make sure both comment and breakpoint line are complete (so that one isn't just a partial match of the other)s + bool comment_complete = (comment_pos == comment_str.size() + || comment_str[comment_pos] == '\r' || comment_str[comment_pos] == '\n'); + bool breakpoint_line_complete = (breakpoint_pos == breakpoint_str.size() + || breakpoint_str[breakpoint_pos] == '\r' || breakpoint_str[breakpoint_pos] == '\n'); + + if(breakpoint_match && comment_complete && breakpoint_line_complete) + { + _interpreter_debug_data.EnableInteractiveMode(); + //don't need to check any more breakpoints + break; + } + } + } + } + + //if breaking on a label + if(_interpreter_debug_data.runUntilLabel != "" || _interpreter_debug_data.breakLabels.size() > 0) + { + size_t num_labels = 0; + if(en != nullptr) + num_labels = en->GetNumLabels(); + + if(num_labels > 0) + { + //check each label to see if matches + auto run_until_label_sid = string_intern_pool.GetIDFromString(_interpreter_debug_data.runUntilLabel); + + for(size_t i = 0; i < num_labels; i++) + { + auto label_sid = en->GetLabelStringId(i); + if(label_sid == run_until_label_sid) + { + //re-enter interactiveMode and clear runUntilLabel + _interpreter_debug_data.runUntilLabel = ""; + _interpreter_debug_data.EnableInteractiveMode(); + break; + } + + //iterate over all break labels + for(auto &label : _interpreter_debug_data.breakLabels) + { + auto break_label_sid = string_intern_pool.GetIDFromString(label); + if(label_sid == break_label_sid) + { + //re-enter interactiveMode + _interpreter_debug_data.EnableInteractiveMode(); + break; + } + } + } + } + } + } +} diff --git a/src/Amalgam/interpreter/InterpreterOpcodesBase.cpp b/src/Amalgam/interpreter/InterpreterOpcodesBase.cpp new file mode 100644 index 00000000..a535725a --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterOpcodesBase.cpp @@ -0,0 +1,1872 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "Concurrency.h" +#include "Cryptography.h" +#include "DateTimeFormat.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EntityWriteListener.h" +#include "EvaluableNodeTreeFunctions.h" +#include "PerformanceProfiler.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//Used only for deep debugging of entity memory and garbage collection +std::string GetEntityMemorySizeDiagnostics(Entity *e) +{ + if(e == nullptr) + return ""; + + static CompactHashMap entity_core_allocs; + static CompactHashMap entity_temp_unused; + + //initialize to zero if not already in the list + auto prev_used = entity_core_allocs.insert(std::make_pair(e, 0)); + auto prev_unused = entity_temp_unused.insert(std::make_pair(e, 0)); + + size_t cur_used = e->evaluableNodeManager.GetNumberOfUsedNodes(); + size_t cur_unused = e->evaluableNodeManager.GetNumberOfUnusedNodes(); + + std::string result; + + if(cur_used > prev_used.first->second || cur_unused > prev_unused.first->second) + { + result += e->GetId() + " (used, free): " + EvaluableNode::NumberToString(cur_used - prev_used.first->second) + ", " + + EvaluableNode::NumberToString(cur_unused - prev_unused.first->second) + "\n"; + + prev_used.first->second = cur_used; + prev_unused.first->second = cur_unused; + } + + for(auto entity : e->GetContainedEntities()) + result += GetEntityMemorySizeDiagnostics(entity); + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SYSTEM(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + if(!asset_manager.DoesEntityHaveRootPermission(curEntity)) + return EvaluableNodeReference::Null(); + + std::string command = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + + if(writeListeners != nullptr) + { + for(auto &wl : *writeListeners) + wl->LogSystemCall(ocn[0]); + } + + if(command == "exit") + { + exit(0); + } + else if(command == "readline") + { + std::string input; + std::getline(std::cin, input); + + //exit if have no more input + if(std::cin.bad() || std::cin.eof()) + exit(0); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, input), true); + } + else if(command == "printline" && ocn.size() > 1) + { + std::string output = InterpretNodeIntoStringValueEmptyNull(ocn[1]); + printListener->LogPrint(output); + printListener->FlushLogFile(); + return EvaluableNodeReference::Null(); + } + else if(command == "cwd") + { + //if no parameter specified, return the directory + if(ocn.size() == 1) + { + auto path = std::filesystem::current_path(); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, path.string()), true); + } + + std::string directory = InterpretNodeIntoStringValueEmptyNull(ocn[1]); + std::filesystem::path path(directory); + + //try to set the directory + std::error_code error; + std::filesystem::current_path(directory, error); + if(error) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + else + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_TRUE), true); + } + else if(command == "system" && ocn.size() > 1) + { + std::string sys_command = InterpretNodeIntoStringValueEmptyNull(ocn[1]); + + bool successful_run = false; + int exit_code = 0; + std::string stdout_data = Platform_RunSystemCommand(sys_command, successful_run, exit_code); + + if(!successful_run) + return EvaluableNodeReference::Null(); + + EvaluableNode *list = evaluableNodeManager->AllocNode(ENT_LIST); + list->AppendOrderedChildNode(evaluableNodeManager->AllocNode(static_cast(exit_code))); + list->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, stdout_data)); + + return EvaluableNodeReference(list, true); + } + else if(command == "os") + { + std::string os = Platform_GetOperatingSystemName(); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, os), true); + } + else if(command == "sleep") + { + std::chrono::microseconds sleep_time_usec(1); + if(ocn.size() > 1) + { + double sleep_time_sec = InterpretNodeIntoNumberValue(ocn[1]); + sleep_time_usec = std::chrono::microseconds(static_cast(1000000.0 * sleep_time_sec)); + } + + Platform_EnsurePreciseTiming(); + std::this_thread::sleep_for(sleep_time_usec); + } + else if(command == "version") + { + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, AMALGAM_VERSION_STRING), true); + } + else if(command == "est_mem_reserved") + { + return EvaluableNodeReference(evaluableNodeManager->AllocNode(static_cast(curEntity->GetEstimatedReservedDeepSizeInBytes())), true); + } + else if(command == "est_mem_used") + { + return EvaluableNodeReference(evaluableNodeManager->AllocNode(static_cast(curEntity->GetEstimatedUsedDeepSizeInBytes())), true); + } + else if(command == "mem_diagnostics") + { + + #ifdef MULTITHREAD_SUPPORT + auto lock = curEntity->CreateEntityLock(); + #endif + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, GetEntityMemorySizeDiagnostics(curEntity)), true); + } + else if(command == "rand" && ocn.size() > 1) + { + double num_bytes_raw = InterpretNodeIntoNumberValue(ocn[1]); + size_t num_bytes = 0; + if(num_bytes_raw > 0) + num_bytes = static_cast(num_bytes_raw); + + std::string rand_data(num_bytes, '\0'); + Platform_GenerateSecureRandomData(&rand_data[0], num_bytes); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, rand_data), true); + } + else if(command == "sign_key_pair") + { + auto [public_key, secret_key] = GenerateSignatureKeyPair(); + EvaluableNode *list = evaluableNodeManager->AllocListNodeWithOrderedChildNodes(ENT_STRING, 2); + auto &list_ocn = list->GetOrderedChildNodes(); + list_ocn[0]->SetStringValue(public_key); + list_ocn[1]->SetStringValue(secret_key); + + return EvaluableNodeReference(list, true); + + } + else if(command == "encrypt_key_pair") + { + auto [public_key, secret_key] = GenerateEncryptionKeyPair(); + EvaluableNode *list = evaluableNodeManager->AllocListNodeWithOrderedChildNodes(ENT_STRING, 2); + auto &list_ocn = list->GetOrderedChildNodes(); + list_ocn[0]->SetStringValue(public_key); + list_ocn[1]->SetStringValue(secret_key); + + return EvaluableNodeReference(list, true); + } + else if(command == "built_in_data") + { + uint8_t built_in_data[] = AMALGAM_BUILT_IN_DATA; + std::string built_in_data_s(reinterpret_cast(&built_in_data[0]), sizeof(built_in_data)); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, built_in_data_s), true); + } + + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_DEFAULTS(EvaluableNode *en) +{ + if(en->GetOrderedChildNodes().size() == 0) + return EvaluableNodeReference::Null(); + //get the string key + std::string key = InterpretNodeIntoStringValueEmptyNull(en->GetOrderedChildNodes()[0]); + + if(key == "mutation_opcodes") + { + EvaluableNode *out_node = evaluableNodeManager->AllocNode(ENT_ASSOC); + out_node->ReserveMappedChildNodes(EvaluableNodeTreeManipulation::evaluableNodeTypeProbabilities.size()); + for(auto &[node_type, node_prob] : EvaluableNodeTreeManipulation::evaluableNodeTypeProbabilities) + { + EvaluableNode *num_node = evaluableNodeManager->AllocNode(ENT_NUMBER); + num_node->SetNumberValue(node_prob); + + const std::string &node_type_string = GetStringFromEvaluableNodeType(node_type, true); + out_node->SetMappedChildNode(node_type_string, num_node); + } + + return EvaluableNodeReference(out_node, true); + } + + if(key == "mutation_types") + { + EvaluableNode *out_node = evaluableNodeManager->AllocNode(ENT_ASSOC); + out_node->ReserveMappedChildNodes(EvaluableNodeTreeManipulation::mutationOperationTypeProbabilities.size()); + for(auto &[op_type, op_prob] : EvaluableNodeTreeManipulation::mutationOperationTypeProbabilities) + { + EvaluableNode *num_node = evaluableNodeManager->AllocNode(ENT_NUMBER); + num_node->SetNumberValue(op_prob); + out_node->SetMappedChildNode(op_type, num_node); + } + + return EvaluableNodeReference(out_node, true); + } + + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_PARSE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + //get the string to parse + auto [valid_string, to_parse] = InterpretNodeIntoStringValue(ocn[0]); + if(!valid_string) + return EvaluableNodeReference::Null(); + + return Parser::Parse(to_parse, evaluableNodeManager); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_UNPARSE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + bool pretty = false; + if(ocn.size() > 1) + pretty = InterpretNodeIntoBoolValue(ocn[1]); + + bool deterministic_order = false; + if(ocn.size() > 2) + deterministic_order = InterpretNodeIntoBoolValue(ocn[2]); + + auto tree = InterpretNodeForImmediateUse(ocn[0]); + std::string s = Parser::Unparse(tree, evaluableNodeManager, pretty, true, deterministic_order); + evaluableNodeManager->FreeNodeTreeIfPossible(tree); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, s), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_IF(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + size_t num_cn = ocn.size(); + + //step every two parameters as condition-expression pairs + for(size_t condition_num = 0; condition_num + 1 < num_cn; condition_num += 2) + { + if(InterpretNodeIntoBoolValue(ocn[condition_num])) + return InterpretNode(ocn[condition_num + 1]); + } + + //if made it here and one more condition, then it hit the last "else" branch, so exit evaluating to the else + if(num_cn & 1) + return InterpretNode(ocn[num_cn - 1]); + + //none were true + return EvaluableNodeReference::Null(); +} + +//removes the conclude node from the top of the conclusion and, if possible, will free it, saving memory +inline EvaluableNodeReference RemoveConcludeFromConclusion(EvaluableNodeReference result, EvaluableNodeManager *enm) +{ + if(result == nullptr || result->GetOrderedChildNodes().size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *conclusion = result->GetOrderedChildNodes()[0]; + enm->FreeNodeIfPossible(result); + + return EvaluableNodeReference(conclusion, result.unique); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SEQUENCE(EvaluableNode *en) +{ + EvaluableNodeReference result = EvaluableNodeReference::Null(); + for(auto &cn : en->GetOrderedChildNodes()) + { + if(result != nullptr && result->GetType() == ENT_CONCLUDE) + return RemoveConcludeFromConclusion(result, evaluableNodeManager); + + evaluableNodeManager->FreeNodeTreeIfPossible(result); + result = InterpretNode(cn); + } + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_PARALLEL(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + +#ifdef MULTITHREAD_SUPPORT + if(en->GetConcurrency() && ocn.size() > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + size_t num_elements = ocn.size(); + + ConcurrencyManager concurrency_manager(this, num_elements); + + //kick off interpreters + for(size_t element_index = 0; element_index < num_elements; element_index++) + { + auto &interpreter = *concurrency_manager.interpreters[element_index]; + EvaluableNode *node_to_execute = ocn[element_index]; + + concurrency_manager.resultFutures.emplace_back( + Concurrency::threadPool.EnqueueBatchTask( + [this, &interpreter, node_to_execute, &concurrency_manager] + { + interpreter.memoryModificationLock = Concurrency::ReadLock(interpreter.evaluableNodeManager->memoryModificationMutex); + auto result = interpreter.ExecuteNode(node_to_execute, + evaluableNodeManager->AllocListNode(callStackNodes), + evaluableNodeManager->AllocListNode(interpreterNodeStackNodes), + evaluableNodeManager->AllocListNode(constructionStackNodes), + &constructionStackIndices, + concurrency_manager.GetCallStackWriteMutex()); + + interpreter.evaluableNodeManager->FreeNodeTreeIfPossible(result); + result.reference = EvaluableNodeReference::Null(); + + interpreter.memoryModificationLock.unlock(); + return result; + } + ) + ); + } + + enqueue_task_lock.Unlock(); + + concurrency_manager.EndConcurrency(); + + return EvaluableNodeReference::Null(); + } + } +#endif + + for(auto &cn :ocn) + { + auto result = InterpretNodeForImmediateUse(cn); + evaluableNodeManager->FreeNodeTreeIfPossible(result); + } + + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_LAMBDA(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + size_t ocn_size = ocn.size(); + if(ocn_size == 0) + { + return EvaluableNodeReference::Null(); + } + else if(ocn_size == 1 || !EvaluableNode::IsTrue(ocn[1])) + { + //if only one parameter or second parameter isn't true, just return the result + return EvaluableNodeReference(ocn[0], false); + } + else //evaluate and then wrap in a lambda + { + EvaluableNodeReference evaluated_value = InterpretNode(ocn[0]); + + //need to evaluate its parameter and return a new node encapsulating it + EvaluableNodeReference lambda(evaluableNodeManager->AllocNode(ENT_LAMBDA), true); + lambda->AppendOrderedChildNode(evaluated_value); + lambda.UpdatePropertiesBasedOnAttachedNode(evaluated_value); + + return lambda; + } +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CONCLUDE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + //if no parameter, then return itself + if(ocn.size() == 0 || ocn[0] == nullptr) + return EvaluableNodeReference(en, false); + + EvaluableNodeReference conclusion_value = InterpretNode(ocn[0]); + + //need to evaluate its parameter and return a new node encapsulating it + EvaluableNodeReference conclusion(evaluableNodeManager->AllocNode(ENT_CONCLUDE), true); + conclusion->AppendOrderedChildNode(conclusion_value); + conclusion.UpdatePropertiesBasedOnAttachedNode(conclusion_value); + + return conclusion; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CALL(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto function = InterpretNodeForImmediateUse(ocn[0]); + if(function == nullptr) + return EvaluableNodeReference::Null(); + + auto node_stack = CreateInterpreterNodeStackStateSaver(function); + +#ifdef INTERPRETER_PROFILE_LABELS_CALLED + if(function->GetNumLabels() > 0) + performance_profiler.StartOperation(function->GetLabel(0), evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + //if have an execution context of variables specified, then use it + EvaluableNodeReference new_context = EvaluableNodeReference::Null(); + if(en->GetOrderedChildNodes().size() > 1) + new_context = InterpretNodeForImmediateUse(ocn[1]); + + PushNewExecutionContext(new_context); + + //call the code + auto retval = InterpretNode(function); + + //all finished with new context, but can't free it in case returning something + PopExecutionContext(); + +#ifdef INTERPRETER_PROFILE_LABELS_CALLED + if(function->GetNumLabels() > 0) + performance_profiler.EndOperation(evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + return retval; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CALL_SANDBOXED(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto function = InterpretNodeForImmediateUse(ocn[0]); + if(function == nullptr) + return EvaluableNodeReference::Null(); + + //number of execution steps + //evaluate before context so don't need to keep/remove reference for context + ExecutionCycleCount num_steps_allowed = GetRemainingNumExecutionSteps(); + bool num_steps_allowed_specified = false; + if(ocn.size() > 2) + { + num_steps_allowed = static_cast(InterpretNodeIntoNumberValue(ocn[2])); + num_steps_allowed_specified = true; + } + + //number of execution nodes + //evaluate before context so don't need to keep/remove reference for context + size_t num_nodes_allowed = GetRemainingNumExecutionNodes(); + bool num_nodes_allowed_specified = false; + if(ocn.size() > 3) + { + num_nodes_allowed = static_cast(InterpretNodeIntoNumberValue(ocn[3])); + num_nodes_allowed_specified = true; + } + + auto node_stack = CreateInterpreterNodeStackStateSaver(function); + +#ifdef INTERPRETER_PROFILE_LABELS_CALLED + if(function->GetNumLabels() > 0) + performance_profiler.StartOperation(function->GetLabel(0), evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + //if have an execution context of variables specified, then use it + EvaluableNodeReference args = EvaluableNodeReference::Null(); + if(en->GetOrderedChildNodes().size() > 1) + args = InterpretNode(ocn[1]); + + //build execution context from parameters + EvaluableNodeReference call_stack = ConvertArgsToCallStack(args, evaluableNodeManager); + node_stack.PushEvaluableNode(call_stack); + + //compute execution limits + if(AllowUnlimitedExecutionSteps() && (!num_steps_allowed_specified || num_steps_allowed == 0)) + num_steps_allowed = 0; + else + { + //if unlimited steps are allowed, then leave the value as specified, otherwise clamp to what is remaining + if(!AllowUnlimitedExecutionSteps()) + num_steps_allowed = std::min(num_steps_allowed, GetRemainingNumExecutionSteps()); + } + + if(AllowUnlimitedExecutionNodes() && (!num_nodes_allowed_specified || num_nodes_allowed == 0)) + num_nodes_allowed = 0; + else + { + #ifdef MULTITHREAD_SUPPORT + //if multiple threads, the other threads could be eating into this + num_nodes_allowed *= Concurrency::threadPool.GetNumActiveThreads(); + #endif + + //if unlimited nodes are allowed, then leave the value as specified, otherwise clamp to what is remaining + if(!AllowUnlimitedExecutionNodes()) + num_nodes_allowed = std::min(num_nodes_allowed, GetRemainingNumExecutionNodes()); + } + + Interpreter sandbox(evaluableNodeManager, num_steps_allowed, num_nodes_allowed, randomStream.CreateOtherStreamViaRand(), writeListeners, printListener, nullptr); + +#ifdef MULTITHREAD_SUPPORT + //everything at this point is referenced on stacks; allow the sandbox to trigger a garbage collect without this interpreter blocking + memoryModificationLock.unlock(); + sandbox.memoryModificationLock = Concurrency::ReadLock(evaluableNodeManager->memoryModificationMutex); +#endif + + auto result = sandbox.ExecuteNode(function, call_stack); + +#ifdef MULTITHREAD_SUPPORT + //hand lock back to this interpreter + memoryModificationLock.lock(); + sandbox.memoryModificationLock.unlock(); +#endif + + curExecutionStep += sandbox.curExecutionStep; + +#ifdef INTERPRETER_PROFILE_LABELS_CALLED + if(function->GetNumLabels() > 0) + performance_profiler.EndOperation(evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_WHILE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference result = EvaluableNodeReference::Null(); + auto node_stack = CreateInterpreterNodeStackStateSaver(); + for(;;) + { + //keep the result before testing condition + node_stack.PushEvaluableNode(result); + bool condition_true = InterpretNodeIntoBoolValue(ocn[0]); + node_stack.PopEvaluableNode(); + + if(!condition_true) + break; + + if(AreExecutionResourcesExhausted()) + return EvaluableNodeReference::Null(); + + //run each step within the loop + for(size_t i = 1; i < ocn.size(); i++) + { + if(result != nullptr && result->GetType() == ENT_CONCLUDE) + return RemoveConcludeFromConclusion(result, evaluableNodeManager); + + evaluableNodeManager->FreeNodeTreeIfPossible(result); + result = InterpretNode(ocn[i]); + } + } + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_LET(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + //add new context + auto new_context = InterpretNodeForImmediateUse(ocn[0]); + PushNewExecutionContext(new_context); + + //run code + EvaluableNodeReference result = EvaluableNodeReference::Null(); + for(size_t i = 1; i < ocn.size(); i++) + { + if(result != nullptr && result->GetType() == ENT_CONCLUDE) + { + PopExecutionContext(); + return RemoveConcludeFromConclusion(result, evaluableNodeManager); + } + + //free from previous iteration + evaluableNodeManager->FreeNodeTreeIfPossible(result); + result = InterpretNode(ocn[i]); + } + + //all finished with new context, but can't free it in case returning something + PopExecutionContext(); + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_DECLARE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + //get the current layer of the stack + EvaluableNode *scope = GetCurrentExecutionContext(); + if(scope == nullptr) //this shouldn't happen, but just in case it does + return EvaluableNodeReference::Null(); + auto &scope_ocn = scope->GetMappedChildNodesReference(); + + //work on the node that is declaring the variables + EvaluableNode *required_vars_node = ocn[0]; + if(required_vars_node != nullptr) + { + //transform into variables if possible + EvaluableNodeReference required_vars; + + bool need_to_interpret = false; + if(required_vars_node->GetIsIdempotent()) + { + required_vars = EvaluableNodeReference(required_vars_node, false); + } + else if(required_vars_node->IsAssociativeArray()) + { + required_vars = EvaluableNodeReference(required_vars_node, false); + need_to_interpret = true; + } + else //just need to interpret + { + required_vars = InterpretNode(required_vars_node); + } + + if(required_vars != nullptr && required_vars->IsAssociativeArray()) + { + //check each of the required variables and put into the stack if appropriate + for(auto &[cn_id, cn] : required_vars->GetMappedChildNodesReference()) + { + if(need_to_interpret && cn != nullptr && !cn->GetIsIdempotent()) + { + //don't need to do anything if the variable already exists + if(scope_ocn.find(cn_id) != end(scope_ocn)) + continue; + + PushNewConstructionContext(required_vars, required_vars, EvaluableNodeImmediateValueWithType(cn_id), nullptr); + EvaluableNodeReference value = InterpretNode(cn); + PopConstructionContext(); + + scope->SetMappedChildNode(cn_id, value, false); + } + else //just insert if it doesn't exist + { + auto [inserted, node_ptr] = scope->SetMappedChildNode(cn_id, cn, false); + if(!inserted) + { + //if it can't insert the new variable because it already exists, + // then try to free the default / new value that was attempted to be assigned + if(required_vars.unique && !required_vars.GetNeedCycleCheck()) + evaluableNodeManager->FreeNodeTree(cn); + } + } + } + + //free the vars / assoc node + evaluableNodeManager->FreeNodeIfPossible(required_vars); + } + } + + //used to store the result or clear if possible + EvaluableNodeReference result = EvaluableNodeReference::Null(); + + //run code + for(size_t i = 1; i < ocn.size(); i++) + { + if(result != nullptr && result->GetType() == ENT_CONCLUDE) + return RemoveConcludeFromConclusion(result, evaluableNodeManager); + + evaluableNodeManager->FreeNodeTreeIfPossible(result); + result = InterpretNode(ocn[i]); + } + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ASSIGN_and_ACCUM(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + size_t num_params = ocn.size(); + + if(num_params < 1) + return EvaluableNodeReference::Null(); + + //make sure there's at least an callStack to use + if(callStackNodes->size() < 1) + return EvaluableNodeReference::Null(); + + bool accum = (en->GetType() == ENT_ACCUM); + + //if only one parameter, then assume it is an assoc of variables to accum or assign + if(num_params == 1) + { + EvaluableNode *assigned_vars_node = ocn[0]; + if(assigned_vars_node == nullptr) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference assigned_vars; + bool need_to_interpret = false; + if(assigned_vars_node->GetIsIdempotent()) + { + assigned_vars = EvaluableNodeReference(assigned_vars_node, false); + } + else if(assigned_vars_node->IsAssociativeArray()) + { + assigned_vars = EvaluableNodeReference(assigned_vars_node, false); + need_to_interpret = true; + } + else //just need to interpret + { + assigned_vars = InterpretNode(assigned_vars_node); + } + + if(assigned_vars == nullptr || !assigned_vars->IsAssociativeArray()) + return EvaluableNodeReference::Null(); + + auto node_stack = CreateInterpreterNodeStackStateSaver(assigned_vars); + + //iterate over every variable being assigned + for(auto &[cn_id, cn] : assigned_vars->GetMappedChildNodesReference()) + { + StringInternPool::StringID variable_sid = cn_id; + if(variable_sid == StringInternPool::NOT_A_STRING_ID) + continue; + + //evaluate the value + EvaluableNodeReference variable_value_node(cn, assigned_vars.unique); + if(need_to_interpret && cn != nullptr && !cn->GetIsIdempotent()) + { + PushNewConstructionContext(assigned_vars, assigned_vars, EvaluableNodeImmediateValueWithType(variable_sid), nullptr); + variable_value_node = InterpretNode(cn); + PopConstructionContext(); + } + + //retrieve the symbol + size_t destination_call_stack_index = 0; + + #ifdef MULTITHREAD_SUPPORT + //if editing a shared variable, then need to reserve the stack and re-retrieve the symbol + //note that the above call to GetOrCreateExecutionContextSymbol *is* safe with multithreading, because it will only + //modify the stack that the interpreter has unique access to, but if it returns something further up the stack, + //then it will only read and will return a pointer which could be invalid, but will obtain the pointer again below + //after the lock making sure that it is valid + GetExecutionContextSymbolLocation(variable_sid, destination_call_stack_index); + Concurrency::SingleLock lock(*callStackWriteMutex, std::defer_lock); + if(destination_call_stack_index < callStackSharedAccessStartingDepth && callStackWriteMutex != nullptr) + { + //just in case more than one instruction is trying to write at the same time, + // but one is blocking for garbage collection, + // keep checking until it can get the lock + while(!lock.try_lock()) + { + //keep the value in case collect garbage + node_stack.PushEvaluableNode(variable_value_node); + CollectGarbage(); + node_stack.PopEvaluableNode(); + } + } + #endif + + EvaluableNode **value_destination = GetOrCreateExecutionContextSymbolLocation(variable_sid, destination_call_stack_index); + + if(accum) + { + //retrieve value_destination_node + EvaluableNodeReference value_destination_node; + value_destination_node.reference = *value_destination; + value_destination_node.unique = false; + + #ifdef MULTITHREAD_SUPPORT + //if editing a shared variable, then need to make a copy before editing in place to prevent another thread from reading the data structure mid-edit + if(destination_call_stack_index < callStackSharedAccessStartingDepth) + value_destination_node = evaluableNodeManager->DeepAllocCopy(value_destination_node); + #endif + + variable_value_node = AccumulateEvaluableNodeIntoEvaluableNode(value_destination_node, variable_value_node, evaluableNodeManager); + } + + //assign back into the context_to_use + *value_destination = variable_value_node.reference; + } + + return EvaluableNodeReference::Null(); + } + + //using a single variable + StringInternPool::StringID variable_sid = InterpretNodeIntoStringIDValueWithReference(ocn[0]); + if(variable_sid == StringInternPool::NOT_A_STRING_ID) + return EvaluableNodeReference::Null(); + + //if only 2 params and not accumulating, then just assign/accum the destination + if(num_params == 2) + { + auto new_value = InterpretNodeForImmediateUse(ocn[1]); + + //retrieve the symbol + size_t destination_call_stack_index = 0; + + #ifdef MULTITHREAD_SUPPORT + //if editing a shared variable, then need to reserve the stack and re-retrieve the symbol + GetExecutionContextSymbolLocation(variable_sid, destination_call_stack_index); + + Concurrency::SingleLock lock(*callStackWriteMutex, std::defer_lock); + if(destination_call_stack_index < callStackSharedAccessStartingDepth && callStackWriteMutex != nullptr) + { + //just in case more than one instruction is trying to write at the same time, + // but one is blocking for garbage collection, + // keep checking until it can get the lock + while(!lock.try_lock()) + { + //keep the value in case collect garbage + auto node_stack = CreateInterpreterNodeStackStateSaver(new_value); + CollectGarbage(); + } + } + #endif + + EvaluableNode **value_destination = GetOrCreateExecutionContextSymbolLocation(variable_sid, destination_call_stack_index); + + if(accum) + { + //create destination reference + EvaluableNodeReference value_destination_node; + value_destination_node.reference = *value_destination; + value_destination_node.unique = false; + + #ifdef MULTITHREAD_SUPPORT + //if editing a shared variable, then need to make a copy before editing in place to prevent another thread from reading the data structure mid-edit + if(destination_call_stack_index < callStackSharedAccessStartingDepth) + value_destination_node = evaluableNodeManager->DeepAllocCopy(value_destination_node); + #endif + + EvaluableNodeReference variable_value_node = AccumulateEvaluableNodeIntoEvaluableNode(value_destination_node, new_value, evaluableNodeManager); + + //assign the new accumulation + *value_destination = variable_value_node.reference; + } + else + { + *value_destination = new_value; + } + } + else //more than 2, need to make a copy and fill in as appropriate + { + //get each address/value pair to replace in result + size_t replace_change_index = 1; + for(; replace_change_index + 1 < num_params; replace_change_index += 2) + { + if(AreExecutionResourcesExhausted()) + return EvaluableNodeReference::Null(); + + auto new_value = InterpretNodeForImmediateUse(ocn[replace_change_index + 1]); + auto node_stack = CreateInterpreterNodeStackStateSaver(new_value); + + EvaluableNodeReference address_list_node = InterpretNodeForImmediateUse(ocn[replace_change_index]); + + //retrieve the symbol + size_t destination_call_stack_index = 0; + + #ifdef MULTITHREAD_SUPPORT + //if editing a shared variable, then need to reserve the stack and re-retrieve the symbol + GetExecutionContextSymbolLocation(variable_sid, destination_call_stack_index); + + Concurrency::SingleLock lock(*callStackWriteMutex, std::defer_lock); + if(destination_call_stack_index < callStackSharedAccessStartingDepth && callStackWriteMutex != nullptr) + { + //just in case more than one instruction is trying to write at the same time, + // but one is blocking for garbage collection, + // keep checking until it can get the lock + while(!lock.try_lock()) + { + node_stack.PushEvaluableNode(address_list_node); + CollectGarbage(); + node_stack.PopEvaluableNode(); + } + } + #endif + + EvaluableNode **value_destination = GetOrCreateExecutionContextSymbolLocation(variable_sid, destination_call_stack_index); + + //need to make a copy so that it can be dropped in directly + // this is essential as some values may be complex data structures from other entities + EvaluableNode *value_replacement = evaluableNodeManager->DeepAllocCopy(*value_destination); + + //find location to store results + EvaluableNode **copy_destination = TraverseToDestinationFromTraversalPathList(&value_replacement, address_list_node, true); + evaluableNodeManager->FreeNodeTreeIfPossible(address_list_node); + if(copy_destination == nullptr) + continue; + + if(accum) + { + //create destination reference + EvaluableNodeReference value_destination_node; + value_destination_node.reference = *copy_destination; + value_destination_node.unique = false; + + EvaluableNodeReference variable_value_node = AccumulateEvaluableNodeIntoEvaluableNode(value_destination_node, new_value, evaluableNodeManager); + + //assign the new accumulation + *copy_destination = variable_value_node.reference; + } + else + { + *copy_destination = new_value; + } + + *value_destination = value_replacement; + } + } + + string_intern_pool.DestroyStringReference(variable_sid); + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_RETRIEVE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + auto to_lookup = InterpretNodeForImmediateUse(ocn[0]); + + //get the value(s) + if(to_lookup == nullptr || IsEvaluableNodeTypeImmediate(to_lookup->GetType())) + { + StringInternPool::StringID symbol_name_sid = EvaluableNode::ToStringIDIfExists(to_lookup); + evaluableNodeManager->FreeNodeTreeIfPossible(to_lookup); + return EvaluableNodeReference(GetExecutionContextSymbol(symbol_name_sid), false); + } + else if(to_lookup->IsAssociativeArray()) + { + //need to return an assoc, so see if need to make copy + if(!to_lookup.unique) + to_lookup.reference = evaluableNodeManager->AllocNode(to_lookup); + + //overwrite values in the ordered + for(auto &[cn_id, cn] : to_lookup->GetMappedChildNodesReference()) + { + //if there are values passed in, free them to be clobbered + EvaluableNodeReference cnr(cn, to_lookup.unique); + evaluableNodeManager->FreeNodeTreeIfPossible(cnr); + + cn = GetExecutionContextSymbol(cn_id); + } + + return EvaluableNodeReference(to_lookup.reference, false); + } + else //ordered params + { + //need to return an assoc, so see if need to make copy + if(!to_lookup.unique) + to_lookup.reference = evaluableNodeManager->AllocNode(to_lookup); + + //overwrite values in the ordered + for(auto &cn : to_lookup->GetOrderedChildNodes()) + { + StringInternPool::StringID symbol_name_sid = EvaluableNode::ToStringIDIfExists(cn); + if(symbol_name_sid == StringInternPool::NOT_A_STRING_ID) + { + cn = nullptr; + continue; + } + + //if there are values passed in, free them to be clobbered + EvaluableNodeReference cnr(cn, to_lookup.unique); + evaluableNodeManager->FreeNodeTreeIfPossible(cnr); + + cn = GetExecutionContextSymbol(symbol_name_sid); + } + + return EvaluableNodeReference(to_lookup.reference, false); + } +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + size_t ocn_size = ocn.size(); + + if(ocn_size < 1) + return EvaluableNodeReference::Null(); + + auto source = InterpretNodeForImmediateUse(ocn[0]); + if(ocn_size < 2 || source == nullptr) + return source; + + auto node_stack = CreateInterpreterNodeStackStateSaver(source); + + //if just a single index passed to get + if(ocn_size == 2) + { + EvaluableNode **target = InterpretNodeIntoDestinationFromTraversalPathList(&source.reference, ocn[1], false); + + node_stack.PopEvaluableNode(); + + if(target == nullptr) + { + evaluableNodeManager->FreeNodeTreeIfPossible(source); + return EvaluableNodeReference::Null(); + } + + return EvaluableNodeReference(*target, source.unique); //only know about the target that it has similar properties to the source + } + + //else, return a list for everything retrieved via get + EvaluableNodeReference retrieved_list(evaluableNodeManager->AllocNode(ENT_LIST), false); + retrieved_list->ReserveOrderedChildNodes(ocn_size - 1); + node_stack.PushEvaluableNode(retrieved_list); + + for(size_t param_index = 1; param_index < ocn_size; param_index++) + { + EvaluableNode **target = InterpretNodeIntoDestinationFromTraversalPathList(&source.reference, ocn[param_index], false); + if(target != nullptr) + retrieved_list->AppendOrderedChildNode(*target); + else + retrieved_list->AppendOrderedChildNode(nullptr); + } + + //if one or fewer child nodes, the append function will have set the appropriate cycle check flag, + // but if two or more nodes, then there colud be duplicate nodes + if(retrieved_list->GetNumChildNodes() > 1) + retrieved_list->SetNeedCycleCheck(true); + + return retrieved_list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_and_REPLACE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto result = InterpretNode(ocn[0]); + if(result == nullptr) + return EvaluableNodeReference::Null(); + + if(!result.unique) + result = evaluableNodeManager->DeepAllocCopy(result); + + auto node_stack = CreateInterpreterNodeStackStateSaver(result); + + //get each address/value pair to replace in result + for(size_t replace_change_index = 1; replace_change_index + 1 < ocn.size(); replace_change_index += 2) + { + //find replacement location, make sure it's a valid target + EvaluableNode *previous_result = result; + EvaluableNode **copy_destination = InterpretNodeIntoDestinationFromTraversalPathList(&result.reference, ocn[replace_change_index], true); + //if the target changed, keep track of the proper reference + if(result != previous_result) + { + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(result); + } + if(copy_destination == nullptr) + continue; + + //////////////////// + //compute new value + + if(en->GetType() == ENT_SET) + { + //just in case copy_destination points to result + auto new_value = InterpretNode(ocn[replace_change_index + 1]); + + if(*copy_destination != result) //normal replacement + { + if(result.unique && !result.GetNeedCycleCheck()) + evaluableNodeManager->FreeNodeTree(*copy_destination); + *copy_destination = new_value; + } + else //replace the whole thing from the top + { + node_stack.PopEvaluableNode(); + *copy_destination = new_value; + node_stack.PushEvaluableNode(result); + } + + result.UpdatePropertiesBasedOnAttachedNode(new_value); + } + else //en->GetType() == ENT_REPLACE + { + //replace copy_destination (a part of result) with the new value + auto function = InterpretNodeForImmediateUse(ocn[replace_change_index + 1]); + if(EvaluableNode::IsNull(function)) + { + (*copy_destination) = nullptr; + continue; + } + + node_stack.PushEvaluableNode(function); + PushNewConstructionContext(nullptr, result, EvaluableNodeImmediateValueWithType(), *copy_destination); + + EvaluableNodeReference new_value = InterpretNodeForImmediateUse(function); + + PopConstructionContext(); + node_stack.PopEvaluableNode(); + + if(*copy_destination != result) //normal replacement + { + (*copy_destination) = new_value; + } + else //replacing root, need to manage references to not leave stray memory + { + node_stack.PopEvaluableNode(); + result = new_value; + node_stack.PushEvaluableNode(result); + } + + //can't guarantee anything with replace + result.unique = false; + result.SetNeedCycleCheck(true); + } + } + + //if not everything coming in was unique, then one of them could have been a duplicate + //therefore, if any part isn't unique then it cannot be guaranteed to be cycle free (even though it may be) + if(!result.unique) + result.SetNeedCycleCheck(true); + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TARGET(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + size_t depth = 0; + if(ocn.size() > 0) + { + double value = InterpretNodeIntoNumberValue(ocn[0]); + if(value >= 0) + depth = static_cast(value); + else + return EvaluableNodeReference::Null(); + } + + //make sure have a large enough stack + if(depth >= constructionStackIndices.size()) + return EvaluableNodeReference::Null(); + + size_t offset = constructionStackNodes->size() - (constructionStackOffsetStride * depth) + constructionStackOffsetTarget; + return EvaluableNodeReference( (*constructionStackNodes)[offset], false); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TARGET_INDEX(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + size_t depth = 0; + if(ocn.size() > 0) + { + double value = InterpretNodeIntoNumberValue(ocn[0]); + if(value >= 0) + depth = static_cast(value); + else + return EvaluableNodeReference::Null(); + } + + //make sure have a large enough stack + if(depth >= constructionStackIndices.size()) + return EvaluableNodeReference::Null(); + + //depth is 1-based + size_t offset = constructionStackIndices.size() - depth - 1; + + //build the index node to return + EvaluableNode *index_node = nullptr; + EvaluableNodeImmediateValueWithType enivwt = constructionStackIndices[offset]; + if(enivwt.nodeType == ENIVT_NUMBER) + index_node = evaluableNodeManager->AllocNode(enivwt.nodeValue.number); + else if(enivwt.nodeType == ENIVT_STRING_ID) + index_node = evaluableNodeManager->AllocNode(ENT_STRING, enivwt.nodeValue.stringID); + + return EvaluableNodeReference(index_node, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TARGET_VALUE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + size_t depth = 0; + if(ocn.size() > 0) + { + double value = InterpretNodeIntoNumberValue(ocn[0]); + if(value >= 0) + depth = static_cast(value); + else + return EvaluableNodeReference::Null(); + } + + //make sure have a large enough stack + if(depth >= constructionStackIndices.size()) + return EvaluableNodeReference::Null(); + + size_t offset = constructionStackNodes->size() - (constructionStackOffsetStride * depth) + constructionStackOffsetTargetValue; + return EvaluableNodeReference( (*constructionStackNodes)[offset], false); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_STACK(EvaluableNode *en) +{ +#ifdef MULTITHREAD_SUPPORT + //accessing everything in the stack, so need exclusive access + Concurrency::SingleLock lock(*callStackWriteMutex, std::defer_lock); + if(callStackWriteMutex != nullptr) + { + //just in case more than one instruction is trying to write at the same time, + // but one is blocking for garbage collection, + // keep checking until it can get the lock + while(!lock.try_lock()) + CollectGarbage(); + } +#endif + + //can create this node on the stack because will be making a copy + EvaluableNode stack_top_holder(ENT_LIST); + stack_top_holder.SetNeedCycleCheck(true); + stack_top_holder.SetOrderedChildNodes(*callStackNodes); + return evaluableNodeManager->DeepAllocCopy(&stack_top_holder); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ARGS(EvaluableNode *en) +{ + size_t depth = 0; + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() > 0) + { + double value = InterpretNodeIntoNumberValue(ocn[0]); + depth = static_cast(value); + } + + //make sure have a large enough stack + if(callStackNodes->size() >= depth + 1) + return EvaluableNodeReference( (*callStackNodes)[callStackNodes->size() - (depth + 1)], false); //0 index is top of stack + else + return EvaluableNodeReference::Null(); +} + +//Generates an EvaluableNode containing a random value based on the random parameter param, using enm and random_stream +// if any part of param is preserved in the return value, then can_free_param will be set to false, otherwise it will be left alone +EvaluableNodeReference GenerateRandomValueBasedOnRandParam(EvaluableNodeReference param, EvaluableNodeManager *enm, RandomStream &random_stream, bool &can_free_param) +{ + if(param == nullptr) + return EvaluableNodeReference(enm->AllocNode(random_stream.RandFull()), true); + + auto &ocn = param->GetOrderedChildNodes(); + if(ocn.size() > 0) + { + size_t selection = random_stream.RandSize(ocn.size()); + can_free_param = false; + return EvaluableNodeReference(ocn[selection], param.unique); + } + + if(DoesEvaluableNodeTypeUseNumberData(param->GetType())) + { + double value = random_stream.RandFull() * param->GetNumberValue(); + return EvaluableNodeReference(enm->AllocNode(value), true); + } + + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_RAND(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(randomStream.RandFull()), true); + + //get number to generate + bool generate_list = false; + size_t number_to_generate = 1; + if(ocn.size() >= 2) + { + double num_value = InterpretNodeIntoNumberValue(ocn[1]); + if(FastIsNaN(num_value) || num_value < 0) + return EvaluableNodeReference::Null(); + number_to_generate = static_cast(num_value); + generate_list = true; + } + //make sure not eating up too much memory + if(!AllowUnlimitedExecutionNodes() && curNumExecutionNodes + number_to_generate >= maxNumExecutionNodes) + return EvaluableNodeReference::Null(); + + //get whether it needs to be unique + bool generate_unique_values = false; + if(ocn.size() >= 3) + generate_unique_values = InterpretNodeIntoBoolValue(ocn[2]); + + //get random param + auto param = InterpretNodeForImmediateUse(ocn[0]); + + if(!generate_list) + { + bool can_free_param = true; + EvaluableNodeReference rand_value = GenerateRandomValueBasedOnRandParam(param, evaluableNodeManager, randomStream, can_free_param); + + if(can_free_param) + evaluableNodeManager->FreeNodeTreeIfPossible(param); + else + evaluableNodeManager->FreeNodeIfPossible(param); + return rand_value; + } + + if(generate_unique_values && param != nullptr && param->GetOrderedChildNodes().size() > 0) + { + //clamp to the maximum number that can possibly be generated + size_t num_elements = param->GetOrderedChildNodes().size(); + number_to_generate = std::min(number_to_generate, num_elements); + + //want to generate multiple values, so return a list + //try to reuse param if can so don't need to allocate more memory + EvaluableNodeReference retval; + bool free_param = false; + if(param.unique) + { + retval = param; + } + else + { + free_param = true; + retval = EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_LIST), true); + retval->SetOrderedChildNodes(param->GetOrderedChildNodes()); + } + + //shuffle ordered child nodes + auto &retval_ocn = retval->GetOrderedChildNodes(); + for(size_t i = 0; i < number_to_generate; i++) + { + size_t to_swap_with = randomStream.RandSize(num_elements); + std::swap(retval_ocn[i], retval_ocn[to_swap_with]); + } + + retval.UpdatePropertiesBasedOnAttachedNode(param); + + //free unneeded nodes that weren't part of the shuffle + if(param.unique && !param->GetNeedCycleCheck()) + { + if(free_param) + evaluableNodeManager->FreeNodeIfPossible(param); + + for(size_t i = number_to_generate; i < num_elements; i++) + evaluableNodeManager->FreeNodeTree(retval_ocn[i]); + } + + //get rid of unneeded extra nodes + retval->SetOrderedChildNodesSize(number_to_generate); + retval->ReleaseOrderedChildNodesExtraMemory(); + + return retval; + } + + //want to generate multiple values, so return a list + EvaluableNodeReference retval(evaluableNodeManager->AllocNode(ENT_LIST), true); + + //just generate a list of values with replacement; either generate_unique_values was not set or the distribution "always" generates unique values + retval->ReserveOrderedChildNodes(number_to_generate); + + //just get a bunch of random values with replacement + bool can_free_param = true; + for(size_t i = 0; i < number_to_generate; i++) + { + EvaluableNodeReference rand_value = GenerateRandomValueBasedOnRandParam(param, evaluableNodeManager, randomStream, can_free_param); + retval->AppendOrderedChildNode(rand_value); + retval.UpdatePropertiesBasedOnAttachedNode(rand_value); + } + + if(can_free_param) + evaluableNodeManager->FreeNodeTreeIfPossible(param); + else + { + //if used the parameters, a parameter might be used more than once + retval->SetNeedCycleCheck(true); + evaluableNodeManager->FreeNodeIfPossible(param); + } + + return retval; +} + +//given an assoc of StringID -> value representing the probability weight of each, and a random stream, it randomly selects from the assoc +// if it can't find an appropriate probability, it returns an empty string +// if normalize is true, then it will accumulate the probability and then normalize +StringInternPool::StringID GetRandomWeightedKey(EvaluableNode::AssocType &assoc, RandomStream &rs, bool normalize) +{ + double probability_target = rs.RandFull(); + double accumulated_probability = 0.0; + double total_probability = 1.0; + + if(normalize) + { + total_probability = 0; + for(auto &[_, prob] : assoc) + total_probability += std::max(0.0, EvaluableNode::ToNumber(prob, 0.0)); + + //if no probabilities, just choose uniformly + if(total_probability <= 0.0) + { + //find index to return + size_t index_to_return = static_cast(assoc.size() * probability_target); + + //iterate over pairs until find the index + size_t cur_index = 0; + for(auto &[prob_id, _] : assoc) + { + if(cur_index == index_to_return) + return prob_id; + + cur_index++; + } + + return StringInternPool::NOT_A_STRING_ID; + } + + if(total_probability == std::numeric_limits::infinity()) + { + //start over, count infinities + size_t inf_count = 0; + for(auto &[_, prob] : assoc) + { + if(EvaluableNode::ToNumber(prob, 0.0) == std::numeric_limits::infinity()) + inf_count++; + } + + //get the infinity to use + inf_count = static_cast(inf_count * probability_target); + + //count down until the infinite pair is found + for(auto &[prob_id, prob] : assoc) + { + if(EvaluableNode::ToNumber(prob, 0.0) == std::numeric_limits::infinity()) + { + if(inf_count == 0) + return prob_id; + inf_count--; + } + } + + //shouldn't make it here + return StringInternPool::NOT_A_STRING_ID; + } + } + + for(auto &[prob_id, prob] : assoc) + { + accumulated_probability += (EvaluableNode::ToNumber(prob, 0.0) / total_probability); + if(probability_target < accumulated_probability) + return prob_id; + } + + //probability mass didn't add up, just grab the first one with a probability greater than zero + for(auto &[prob_id, prob] : assoc) + { + if(EvaluableNode::ToNumber(prob, 0.0) > 0) + return prob_id; + } + + //nothing valid to return + return StringInternPool::NOT_A_STRING_ID; +} + +//given a vector of vector of the probability weight of each value as probability_nodes, and a random stream, it randomly selects by probability and returns the index +// if it can't find an appropriate probability, it returns the size of the probabilities list +// if normalize is true, then it will accumulate the probability and then normalize +size_t GetRandomWeightedValueIndex(std::vector &probability_nodes, RandomStream &rs, bool normalize) +{ + double probability_target = rs.RandFull(); + double accumulated_probability = 0.0; + double total_probability = 1.0; + + if(normalize) + { + total_probability = 0; + for(auto pn : probability_nodes) + total_probability += std::max(0.0, EvaluableNode::ToNumber(pn, 0.0)); + + //if no probabilities, just choose uniformly + if(total_probability <= 0.0) + return static_cast(probability_nodes.size() * probability_target); + + if(total_probability == std::numeric_limits::infinity()) + { + //start over, count infinities + size_t inf_count = 0; + for(auto pn : probability_nodes) + { + if(EvaluableNode::ToNumber(pn, 0.0) == std::numeric_limits::infinity()) + inf_count++; + } + + //get the infinity to use + inf_count = static_cast(inf_count * probability_target); + + //count down until the infinite pair is found + for(size_t index = 0; index < probability_nodes.size(); index++) + { + if(EvaluableNode::ToNumber(probability_nodes[index], 0.0) == std::numeric_limits::infinity()) + { + if(inf_count == 0) + return index; + inf_count--; + } + } + + //shouldn't make it here + return probability_nodes.size(); + } + } + + for(size_t index = 0; index < probability_nodes.size(); index++) + { + accumulated_probability += (EvaluableNode::ToNumber(probability_nodes[index], 0.0) / total_probability); + if(probability_target < accumulated_probability) + return index; + } + + //probability mass didn't add up, just grab the first one with a probability greater than zero + for(size_t index = 0; index < probability_nodes.size(); index++) + { + //make sure don't go past the end of the probability nodes + if(index >= probability_nodes.size()) + break; + + if(EvaluableNode::ToNumber(probability_nodes[index], 0.0) > 0) + return index; + } + + //nothing valid to return + return probability_nodes.size(); +} + +//Generates an EvaluableNode containing a random value based on the random parameter param, using enm and random_stream +// if any part of param is preserved in the return value, then can_free_param will be set to false, otherwise it will be left alone +EvaluableNodeReference GenerateWeightedRandomValueBasedOnRandParam(EvaluableNodeReference param, EvaluableNodeManager *enm, RandomStream &random_stream, bool &can_free_param) +{ + if(param == nullptr) + return EvaluableNodeReference::Null(); + + auto &ocn = param->GetOrderedChildNodes(); + //need to have a value and probability list + if(ocn.size() >= 2) + { + if(ocn[0] == nullptr || ocn[1] == nullptr) + return EvaluableNodeReference::Null(); + + can_free_param = false; + size_t index = GetRandomWeightedValueIndex(ocn[1]->GetOrderedChildNodes(), random_stream, true); + auto &value_ocn = ocn[0]->GetOrderedChildNodes(); + if(index < value_ocn.size()) + return EvaluableNodeReference(value_ocn[index], param.unique); + + return EvaluableNodeReference::Null(); + } + + auto &mcn = param->GetMappedChildNodes(); + if(mcn.size() > 0) + { + StringInternPool::StringID id_selected = GetRandomWeightedKey(mcn, random_stream, true); + return EvaluableNodeReference(enm->AllocNode(ENT_STRING, id_selected), true); + } + + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_WEIGHTED_RAND(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + //get number to generate + bool generate_list = false; + size_t number_to_generate = 1; + if(ocn.size() >= 2) + { + double num_value = InterpretNodeIntoNumberValue(ocn[1]); + if(FastIsNaN(num_value) || num_value < 0) + return EvaluableNodeReference::Null(); + number_to_generate = static_cast(num_value); + generate_list = true; + } + //make sure not eating up too much memory + if(!AllowUnlimitedExecutionNodes() && curNumExecutionNodes + number_to_generate >= maxNumExecutionNodes) + return EvaluableNodeReference::Null(); + + //get whether it needs to be unique + bool generate_unique_values = false; + if(ocn.size() >= 3) + generate_unique_values = InterpretNodeIntoBoolValue(ocn[2]); + + //get weighted random param + auto param = InterpretNodeForImmediateUse(ocn[0]); + + if(!generate_list) + { + bool can_free_param = true; + EvaluableNodeReference rand_value = GenerateWeightedRandomValueBasedOnRandParam(param, evaluableNodeManager, randomStream, can_free_param); + + if(can_free_param) + evaluableNodeManager->FreeNodeTreeIfPossible(param); + else + evaluableNodeManager->FreeNodeIfPossible(param); + return rand_value; + } + + if(generate_unique_values) + { + auto ¶m_ocn = param->GetOrderedChildNodes(); + if(param_ocn.size() > 0) + { + EvaluableNodeReference retval(evaluableNodeManager->AllocNode(ENT_LIST), true); + + if(param_ocn.size() < 2 || param_ocn[0] == nullptr || param_ocn[1] == nullptr) + return retval; + + //clamp to the maximum number that can possibly be generated + number_to_generate = std::min(number_to_generate, param_ocn.size()); + retval->ReserveOrderedChildNodes(number_to_generate); + + //make a copy of all of the values and probabilities so they can be removed one at a time + std::vector values(param_ocn[0]->GetOrderedChildNodes()); + std::vector probabilities(param_ocn[1]->GetOrderedChildNodes()); + + for(size_t i = 0; i < number_to_generate; i++) + { + size_t index = GetRandomWeightedValueIndex(probabilities, randomStream, true); + if(index >= values.size()) + break; + + retval->AppendOrderedChildNode(values[index]); + retval.UpdatePropertiesBasedOnAttachedNode(param); + + //remove the element so it won't be reselected + values.erase(begin(values) + index); + probabilities.erase(begin(probabilities) + index); + } + + evaluableNodeManager->FreeNodeIfPossible(param); + return retval; + } + else if(param->GetMappedChildNodes().size() > 0) + { + //clamp to the maximum number that can possibly be generated + number_to_generate = std::min(number_to_generate, param->GetMappedChildNodes().size()); + + //want to generate multiple values, so return a list + EvaluableNodeReference retval( + evaluableNodeManager->AllocListNodeWithOrderedChildNodes(ENT_STRING, number_to_generate), true); + + auto &retval_ocn = retval->GetOrderedChildNodes(); + + //make a copy of all of the probabilities so they can be removed one at a time + EvaluableNode::AssocType assoc(param->GetMappedChildNodes()); + + for(size_t i = 0; i < number_to_generate; i++) + { + StringInternPool::StringID selected_sid = GetRandomWeightedKey(assoc, randomStream, true); + retval_ocn[i]->SetStringID(selected_sid); + + //remove the element so it won't be reselected + assoc.erase(selected_sid); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(param); + return retval; + } + + return EvaluableNodeReference::Null(); + } + + //just generate a list of values with replacement; either generate_unique_values was not set or the distribution "always" generates unique values + EvaluableNodeReference retval(evaluableNodeManager->AllocNode(ENT_LIST), true); + retval->ReserveOrderedChildNodes(number_to_generate); + + auto ¶m_ocn = param->GetOrderedChildNodes(); + //if generating many values with weighted probabilites, use fast method + if(param_ocn.size() > 0 && (number_to_generate > 10 || (number_to_generate > 3 && param_ocn.size() > 200))) + { + if(param_ocn.size() < 2 || param_ocn[0] == nullptr || param_ocn[1] == nullptr) + { + evaluableNodeManager->FreeNodeIfPossible(param); + return retval; + } + + auto &probabilities_ocn = param_ocn[1]->GetOrderedChildNodes(); + std::vector probabilities; + probabilities.reserve(probabilities_ocn.size()); + for(auto pn : probabilities_ocn) + probabilities.push_back(EvaluableNode::ToNumber(pn)); + + auto &values_ocn = param_ocn[0]->GetOrderedChildNodes(); + + WeightedDiscreteRandomStreamTransform wdrst(values_ocn, probabilities, true); + for(size_t i = 0; i < number_to_generate; i++) + { + EvaluableNode *rand_value = wdrst.WeightedDiscreteRand(randomStream); + retval->AppendOrderedChildNode(rand_value); + } + + retval.unique = param.unique; + retval->SetNeedCycleCheck(true); + + evaluableNodeManager->FreeNodeIfPossible(param); + + return retval; + } + + auto &mcn = param->GetMappedChildNodes(); + //if generating many values with weighted probabilites, use fast method + if(mcn.size() > 0 && (number_to_generate > 10 || (number_to_generate > 3 && mcn.size() > 200))) + { + EvaluableNodeMappedWeightedDiscreteRandomStreamTransform wdrst(mcn, false); + for(size_t i = 0; i < number_to_generate; i++) + { + EvaluableNode *rand_value = evaluableNodeManager->AllocNode(ENT_STRING, wdrst.WeightedDiscreteRand(randomStream)); + retval->AppendOrderedChildNode(rand_value); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(param); + return retval; + } + + //just get a bunch of random values with replacement + bool can_free_param = true; + for(size_t i = 0; i < number_to_generate; i++) + { + EvaluableNodeReference rand_value = GenerateWeightedRandomValueBasedOnRandParam(param, evaluableNodeManager, randomStream, can_free_param); + retval->AppendOrderedChildNode(rand_value); + retval.UpdatePropertiesBasedOnAttachedNode(rand_value); + } + + if(can_free_param) + evaluableNodeManager->FreeNodeTreeIfPossible(param); + else + { + //if used the parameters, a parameter might be used more than once + retval->SetNeedCycleCheck(true); + evaluableNodeManager->FreeNodeIfPossible(param); + } + + return retval; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_RAND_SEED(EvaluableNode *en) +{ + std::string rand_state_string = randomStream.GetState(); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, rand_state_string), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_RAND_SEED(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + auto seed_node = InterpretNodeForImmediateUse(ocn[0]); + std::string seed_string; + if(seed_node != nullptr && seed_node->GetType() == ENT_STRING) + seed_string = seed_node->GetStringValue(); + else + seed_string = Parser::Unparse(seed_node, evaluableNodeManager, false, false, true); + + randomStream.SetState(seed_string); + + return seed_node; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SYSTEM_TIME(EvaluableNode *en) +{ + if(!asset_manager.DoesEntityHaveRootPermission(curEntity)) + return EvaluableNodeReference::Null(); + + std::chrono::time_point tp = std::chrono::system_clock::now(); + std::chrono::system_clock::duration duration_us = std::chrono::duration_cast(tp.time_since_epoch()); + std::chrono::duration> double_duration_us = duration_us; + double sec = double_duration_us.count(); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(sec), true); +} + +//error handling + +EvaluableNodeReference Interpreter::InterpretNode_ENT_DEALLOCATED(EvaluableNode *en) +{ + std::cout << "ERROR: attempt to use freed memory\n"; + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_NOT_A_BUILT_IN_TYPE(EvaluableNode *en) +{ + std::cout << "ERROR: encountered an invalid instruction\n"; + return EvaluableNodeReference::Null(); +} + +void Interpreter::ValidateEvaluableNodeIntegrity() +{ + for(EvaluableNode *en : *callStackNodes) + EvaluableNodeManager::ValidateEvaluableNodeTreeMemoryIntegrity(en); + + for(EvaluableNode *en : *interpreterNodeStackNodes) + EvaluableNodeManager::ValidateEvaluableNodeTreeMemoryIntegrity(en); + + for(EvaluableNode *en : *constructionStackNodes) + EvaluableNodeManager::ValidateEvaluableNodeTreeMemoryIntegrity(en); + + if(curEntity != nullptr) + EvaluableNodeManager::ValidateEvaluableNodeTreeMemoryIntegrity(curEntity->GetRoot()); + + auto &nodes_referenced = evaluableNodeManager->GetNodesReferenced(); + for(auto &[en, _] : nodes_referenced) + EvaluableNodeManager::ValidateEvaluableNodeTreeMemoryIntegrity(en); + + if(callingInterpreter != nullptr) + callingInterpreter->ValidateEvaluableNodeIntegrity(); +} diff --git a/src/Amalgam/interpreter/InterpreterOpcodesCodeMixing.cpp b/src/Amalgam/interpreter/InterpreterOpcodesCodeMixing.cpp new file mode 100644 index 00000000..6fbaa350 --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterOpcodesCodeMixing.cpp @@ -0,0 +1,657 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EvaluableNodeTreeDifference.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "PerformanceProfiler.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MUTATE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto to_mutate = InterpretNodeForImmediateUse(ocn[0]); + if(to_mutate == nullptr) + return EvaluableNodeReference::Null(); + auto node_stack = CreateInterpreterNodeStackStateSaver(to_mutate); + + double mutation_rate = 0.00001; + if(ocn.size() > 1) + mutation_rate = InterpretNodeIntoNumberValue(ocn[1]); + + bool ow_exists = false; + CompactHashMap opcode_weights; + if(ocn.size() > 2) + { + auto opcode_weights_node = InterpretNodeForImmediateUse(ocn[2]); + if(!EvaluableNode::IsEmptyNode(opcode_weights_node)) + { + ow_exists = true; + for(auto &[node_id, node] : opcode_weights_node->GetMappedChildNodes()) + opcode_weights[GetEvaluableNodeTypeFromStringId(node_id)] = EvaluableNode::ToNumber(node); + + evaluableNodeManager->FreeNodeTreeIfPossible(opcode_weights_node); + } + } + + bool mtw_exists = false; + CompactHashMap mutation_type_weights; + if(ocn.size() > 3) + { + auto mutation_weights_node = InterpretNodeForImmediateUse(ocn[3]); + if(!EvaluableNode::IsEmptyNode(mutation_weights_node)) + { + mtw_exists = true; + for(auto &[node_id, node] : mutation_weights_node->GetMappedChildNodes()) + mutation_type_weights[node_id] = EvaluableNode::ToNumber(node); + + evaluableNodeManager->FreeNodeTreeIfPossible(mutation_weights_node); + } + } + + //result contains the copied result which may incur replacements + EvaluableNode *result = EvaluableNodeTreeManipulation::MutateTree(this, evaluableNodeManager, to_mutate, mutation_rate, mtw_exists ? &mutation_type_weights : nullptr, ow_exists ? &opcode_weights : nullptr); + EvaluableNodeManager::UpdateFlagsForNodeTree(result); + return EvaluableNodeReference(result, false); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_COMMONALITY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + bool use_string_edit_distance = false; + if(ocn.size() > 2) + use_string_edit_distance = InterpretNodeIntoBoolValue(ocn[2]); + + //calculate edit distance based commonality if string edit distance true and both args are string literals + if(use_string_edit_distance && (ocn[0]->GetType() == ENT_STRING && ocn[1]->GetType() == ENT_STRING)) + { + size_t s1_len = 0; + size_t s2_len = 0; + auto edit_distance = EvaluableNodeTreeManipulation::EditDistance(ocn[0]->GetStringValue(), ocn[1]->GetStringValue(), s1_len, s2_len); + auto commonality = static_cast(std::max(s1_len, s2_len) - edit_distance); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(commonality), true); + } + + //otherwise, treat both as nodes and calculate node commonality + auto tree1 = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(tree1); + + auto tree2 = InterpretNodeForImmediateUse(ocn[1]); + auto results = EvaluableNodeTreeManipulation::NumberOfSharedNodes(tree1, tree2); + + node_stack.PopEvaluableNode(); + + evaluableNodeManager->FreeNodeTreeIfPossible(tree1); + evaluableNodeManager->FreeNodeTreeIfPossible(tree2); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(results.commonality), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_EDIT_DISTANCE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + bool use_string_edit_distance = false; + if(ocn.size() > 2) + use_string_edit_distance = InterpretNodeIntoBoolValue(ocn[2]); + + //otherwise, treat both as nodes and calculate node edit distance + auto tree1 = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(tree1); + + auto tree2 = InterpretNodeForImmediateUse(ocn[1]); + + double edit_distance = 0.0; + //calculate string edit distance if string edit distance true and both args are string literals + if(use_string_edit_distance + && tree1 != nullptr && tree2 != nullptr + && (tree1->GetType() == ENT_STRING && tree2->GetType() == ENT_STRING)) + { + edit_distance = static_cast(EvaluableNodeTreeManipulation::EditDistance(tree1->GetStringValue(), tree2->GetStringValue())); + } + else + { + edit_distance = EvaluableNodeTreeManipulation::EditDistance(tree1, tree2); + } + + node_stack.PopEvaluableNode(); + + evaluableNodeManager->FreeNodeTreeIfPossible(tree1); + evaluableNodeManager->FreeNodeTreeIfPossible(tree2); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(edit_distance), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_INTERSECT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto n1 = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(n1); + + auto n2 = InterpretNodeForImmediateUse(ocn[1]); + node_stack.PushEvaluableNode(n2); + + EvaluableNode *result = EvaluableNodeTreeManipulation::IntersectTrees(evaluableNodeManager, n1, n2); + + //both must be unique and both must be cycle free, otherwise there's a possibility of a cycle + bool cycle_free = (n1.unique && n2.unique && !n1.GetNeedCycleCheck() && !n2.GetNeedCycleCheck()); + //if cycle, double-check everything + if(!cycle_free) + EvaluableNodeManager::UpdateFlagsForNodeTree(result); + return EvaluableNodeReference(result, (n1.unique && n2.unique)); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_UNION(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto n1 = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(n1); + + auto n2 = InterpretNodeForImmediateUse(ocn[1]); + node_stack.PushEvaluableNode(n2); + + EvaluableNode *result = EvaluableNodeTreeManipulation::UnionTrees(evaluableNodeManager, n1, n2); + + //both must be unique and both must be cycle free, otherwise there's a possibility of a cycle + bool cycle_free = (n1.unique && n2.unique && !n1.GetNeedCycleCheck() && !n2.GetNeedCycleCheck()); + //if cycle, double-check everything + if(!cycle_free) + EvaluableNodeManager::UpdateFlagsForNodeTree(result); + return EvaluableNodeReference(result, (n1.unique && n2.unique)); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_DIFFERENCE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto n1 = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(n1); + + auto n2 = InterpretNodeForImmediateUse(ocn[1]); + node_stack.PushEvaluableNode(n2); + + EvaluableNode *result = EvaluableNodeTreeDifference::DifferenceTrees(evaluableNodeManager, n1, n2); + + //both must be unique and both must be cycle free, otherwise there's a possibility of a cycle + bool cycle_free = (n1.unique && n2.unique && !n1.GetNeedCycleCheck() && !n2.GetNeedCycleCheck()); + //if cycle, double-check everything + if(!cycle_free) + EvaluableNodeManager::UpdateFlagsForNodeTree(result); + return EvaluableNodeReference(result, (n1.unique && n2.unique)); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MIX(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + double blend2 = 0.5; //default to half + if(ocn.size() > 2) + blend2 = InterpretNodeIntoNumberValue(ocn[2]); + + double blend1 = 1.0 - blend2; //default to the remainder + if(ocn.size() > 3) + { + blend1 = InterpretNodeIntoNumberValue(ocn[3]); + //if have a third parameter, then use the fractions in order (so need to swap) + std::swap(blend1, blend2); + } + + double similar_mix_chance = 0.0; + if(ocn.size() > 4) + similar_mix_chance = InterpretNodeIntoNumberValue(ocn[4]); + + auto n1 = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(n1); + + auto n2 = InterpretNodeForImmediateUse(ocn[1]); + node_stack.PushEvaluableNode(n2); + + EvaluableNode *result = EvaluableNodeTreeManipulation::MixTrees(randomStream.CreateOtherStreamViaRand(), + evaluableNodeManager, n1, n2, blend1, blend2, similar_mix_chance); + + //both must be unique and both must be cycle free, otherwise there's a possibility of a cycle + bool cycle_free = (n1.unique && n2.unique && !n1.GetNeedCycleCheck() && !n2.GetNeedCycleCheck()); + //if cycle, double-check everything + if(!cycle_free) + EvaluableNodeManager::UpdateFlagsForNodeTree(result); + return EvaluableNodeReference(result, (n1.unique && n2.unique)); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MIX_LABELS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + double blend2 = 0.5; //default to half + if(ocn.size() > 2) + blend2 = InterpretNodeIntoNumberValue(ocn[2]); + + double blend1 = 1.0 - blend2; //default to the remainder + if(ocn.size() > 3) + { + blend1 = InterpretNodeIntoNumberValue(ocn[2]); + //if have a third parameter, then use the fractions in order (so need to swap) + std::swap(blend1, blend2); + } + + auto n1 = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(n1); + + auto n2 = InterpretNodeForImmediateUse(ocn[1]); + node_stack.PushEvaluableNode(n2); + + EvaluableNode *result = EvaluableNodeTreeManipulation::MixTreesByCommonLabels(this, evaluableNodeManager, n1, n2, randomStream, blend1, blend2); + + //both must be unique and both must be cycle free, otherwise there's a possibility of a cycle + bool cycle_free = (n1.unique && n2.unique && !n1.GetNeedCycleCheck() && !n2.GetNeedCycleCheck()); + //if cycle, double-check everything + if(!cycle_free) + EvaluableNodeManager::UpdateFlagsForNodeTree(result); + return EvaluableNodeReference(result, (n1.unique && n2.unique)); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TOTAL_ENTITY_SIZE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //get the id of the first source entity + Entity *source_entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + if(source_entity == nullptr) + return EvaluableNodeReference::Null(); + + double size = static_cast(source_entity->GetDeepSizeInNodes()); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(size), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_FLATTEN_ENTITY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //get the id of the first source entity + Entity *source_entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + if(source_entity == nullptr) + return EvaluableNodeReference::Null(); + + bool include_rand_seeds = true; + if(ocn.size() > 1) + include_rand_seeds = InterpretNodeIntoBoolValue(ocn[1]); + + bool parallel_create = false; + if(ocn.size() > 2) + parallel_create = InterpretNodeIntoBoolValue(ocn[2]); + +#ifdef MULTITHREAD_SUPPORT + //TODO 10975: move this into the entity access above and lock all contained entities + auto read_lock = source_entity->CreateEntityLock(); +#endif + + return EntityManipulation::FlattenEntity(this, source_entity, include_rand_seeds, parallel_create); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MUTATE_ENTITY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to create within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //get the id of the first source entity + Entity *source_entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + //need a source entity, and can't copy self! (that could cause badness) + if(source_entity == nullptr || source_entity == curEntity) + return EvaluableNodeReference::Null(); + + //get mutation rate if applicable + double mutation_rate = 0.00001; + if(ocn.size() > 1) + mutation_rate = InterpretNodeIntoNumberValue(ocn[1]); + + //get destination if applicable + StringInternRef new_entity_id; + Entity *destination_entity_parent = curEntity; + if(ocn.size() > 2) + InterpretNodeIntoDestinationEntity(ocn[2], destination_entity_parent, new_entity_id); + if(destination_entity_parent == nullptr) + return EvaluableNodeReference::Null(); + + bool ow_exists = false; + CompactHashMap opcode_weights; + if(ocn.size() > 3) + { + auto opcode_weights_node = InterpretNodeForImmediateUse(ocn[3]); + if(!EvaluableNode::IsEmptyNode(opcode_weights_node)) + { + ow_exists = true; + for(auto &[node_id, node] : opcode_weights_node->GetMappedChildNodes()) + opcode_weights[GetEvaluableNodeTypeFromStringId(node_id)] = EvaluableNode::ToNumber(node); + + evaluableNodeManager->FreeNodeTreeIfPossible(opcode_weights_node); + } + } + + bool mtw_exists = false; + CompactHashMap mutation_type_weights; + if(ocn.size() > 4) + { + auto mutation_weights_node = InterpretNodeForImmediateUse(ocn[4]); + if(!EvaluableNode::IsEmptyNode(mutation_weights_node)) + { + mtw_exists = true; + for(auto &[node_id, node] : mutation_weights_node->GetMappedChildNodes()) + mutation_type_weights[node_id] = EvaluableNode::ToNumber(node); + + evaluableNodeManager->FreeNodeTreeIfPossible(mutation_weights_node); + } + } + + //create new entity by mutating + Entity *new_entity = EntityManipulation::MutateEntity(this, source_entity, mutation_rate, mtw_exists ? &mutation_type_weights : nullptr, ow_exists ? &opcode_weights : nullptr); + + //accumulate usage + if(!AllowUnlimitedExecutionNodes()) + curNumExecutionNodesAllocatedToEntities += new_entity->GetDeepSizeInNodes(); + + destination_entity_parent->AddContainedEntityViaReference(new_entity, new_entity_id, writeListeners); + + if(new_entity_id == StringInternPool::NOT_A_STRING_ID) + { + delete new_entity; + return EvaluableNodeReference::Null(); + } + + if(destination_entity_parent == curEntity) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, new_entity_id), true); + else //need to return an id list + return EvaluableNodeReference(GetTraversalIDPathListFromAToB(evaluableNodeManager, curEntity, new_entity), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_COMMONALITY_ENTITIES(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //get the id of the first source entity + Entity *source_entity_1 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + if(source_entity_1 == nullptr) + return EvaluableNodeReference::Null(); + + //get the id of the second source entity + Entity *source_entity_2 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[1]); + if(source_entity_2 == nullptr) + return EvaluableNodeReference::Null(); + + auto commonality = EntityManipulation::NumberOfSharedNodes(source_entity_1, source_entity_2); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(commonality.commonality), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_EDIT_DISTANCE_ENTITIES(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //get the id of the first source entity + Entity *source_entity_1 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + if(source_entity_1 == nullptr) + return EvaluableNodeReference::Null(); + + //get the id of the second source entity + Entity *source_entity_2 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[1]); + if(source_entity_2 == nullptr) + return EvaluableNodeReference::Null(); + + double edit_distance = EntityManipulation::EditDistance(source_entity_1, source_entity_2); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(edit_distance), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_INTERSECT_ENTITIES(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to create within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //get the id of the first source entity + Entity *source_entity_1 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + //need a source entity, and can't copy self! (that could cause badness) + if(source_entity_1 == nullptr || source_entity_1 == curEntity) + return EvaluableNodeReference::Null(); + + //get the id of the second source entity + Entity *source_entity_2 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[1]); + //need a source entity, and can't copy self! (that could cause badness) + if(source_entity_2 == nullptr || source_entity_2 == curEntity) + return EvaluableNodeReference::Null(); + + //get destination if applicable + StringInternRef new_entity_id; + Entity *destination_entity_parent = curEntity; + if(ocn.size() > 2) + InterpretNodeIntoDestinationEntity(ocn[2], destination_entity_parent, new_entity_id); + if(destination_entity_parent == nullptr) + return EvaluableNodeReference::Null(); + + //create new entity by merging + Entity *new_entity = EntityManipulation::IntersectEntities(this, source_entity_1, source_entity_2); + + //accumulate usage + if(!AllowUnlimitedExecutionNodes()) + curNumExecutionNodesAllocatedToEntities += new_entity->GetDeepSizeInNodes(); + + destination_entity_parent->AddContainedEntityViaReference(new_entity, new_entity_id, writeListeners); + + if(new_entity_id == StringInternPool::NOT_A_STRING_ID) + { + delete new_entity; + return EvaluableNodeReference::Null(); + } + + if(destination_entity_parent == curEntity) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, new_entity_id), true); + else //need to return an id list + return EvaluableNodeReference(GetTraversalIDPathListFromAToB(evaluableNodeManager, curEntity, new_entity), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_UNION_ENTITIES(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to create within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //get the id of the first source entity + Entity *source_entity_1 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + //need a source entity, and can't copy self! (that could cause badness) + if(source_entity_1 == nullptr || source_entity_1 == curEntity) + return EvaluableNodeReference::Null(); + + //get the id of the second source entity + Entity *source_entity_2 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[1]); + //need a source entity, and can't copy self! (that could cause badness) + if(source_entity_2 == nullptr || source_entity_2 == curEntity) + return EvaluableNodeReference::Null(); + + //get destination if applicable + StringInternRef new_entity_id; + Entity *destination_entity_parent = curEntity; + if(ocn.size() > 2) + InterpretNodeIntoDestinationEntity(ocn[2], destination_entity_parent, new_entity_id); + if(destination_entity_parent == nullptr) + return EvaluableNodeReference::Null(); + + //create new entity by merging + Entity *new_entity = EntityManipulation::UnionEntities(this, source_entity_1, source_entity_2); + + //accumulate usage + if(!AllowUnlimitedExecutionNodes()) + curNumExecutionNodesAllocatedToEntities += new_entity->GetDeepSizeInNodes(); + + destination_entity_parent->AddContainedEntityViaReference(new_entity, new_entity_id, writeListeners); + + if(new_entity_id == StringInternPool::NOT_A_STRING_ID) + { + delete new_entity; + return EvaluableNodeReference::Null(); + } + + if(destination_entity_parent == curEntity) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, new_entity_id), true); + else //need to return an id list + return EvaluableNodeReference(GetTraversalIDPathListFromAToB(evaluableNodeManager, curEntity, new_entity), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_DIFFERENCE_ENTITIES(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //get the id of the first source entity + Entity *entity_1 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + //need a source entity, and can't copy self! (that could cause badness) + if(entity_1 == nullptr || entity_1 == curEntity) + return EvaluableNodeReference::Null(); + + //get the id of the second source entity + Entity *entity_2 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[1]); + //need a source entity, and can't copy self! (that could cause badness) + if(entity_2 == nullptr || entity_2 == curEntity) + return EvaluableNodeReference::Null(); + + return EntityManipulation::DifferenceEntities(this, entity_1, entity_2); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MIX_ENTITIES(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to create within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //get the id of the first source entity + Entity *source_entity_1 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + //need a source entity, and can't copy self! (that could cause badness) + if(source_entity_1 == nullptr || source_entity_1 == curEntity) + return EvaluableNodeReference::Null(); + + //get the id of the second source entity + Entity *source_entity_2 = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[1]); + //need a source entity, and can't copy self! (that could cause badness) + if(source_entity_2 == nullptr || source_entity_2 == curEntity) + return EvaluableNodeReference::Null(); + + double blend2 = 0.5; //default to half + if(ocn.size() > 2) + blend2 = InterpretNodeIntoNumberValue(ocn[2]); + + double blend1 = 1.0 - blend2; //default to the remainder + if(ocn.size() > 3) + { + blend1 = InterpretNodeIntoNumberValue(ocn[3]); + //if have a third parameter, then use the fractions in order (so need to swap) + std::swap(blend1, blend2); + } + + double similar_mix_chance = 0.0; + if(ocn.size() > 4) + similar_mix_chance = InterpretNodeIntoNumberValue(ocn[4]); + + double fraction_unnamed_entities_to_mix = 0.2; + if(ocn.size() > 5) + fraction_unnamed_entities_to_mix = InterpretNodeIntoNumberValue(ocn[5]); + + //get destination if applicable + StringInternRef new_entity_id; + Entity *destination_entity_parent = curEntity; + if(ocn.size() > 6) + InterpretNodeIntoDestinationEntity(ocn[6], destination_entity_parent, new_entity_id); + if(destination_entity_parent == nullptr) + return EvaluableNodeReference::Null(); + + //create new entity by merging + Entity *new_entity = EntityManipulation::MixEntities(this, source_entity_1, source_entity_2, + blend1, blend2, similar_mix_chance, fraction_unnamed_entities_to_mix); + + //accumulate usage + if(!AllowUnlimitedExecutionNodes()) + curNumExecutionNodesAllocatedToEntities += new_entity->GetDeepSizeInNodes(); + + destination_entity_parent->AddContainedEntityViaReference(new_entity, new_entity_id, writeListeners); + + if(new_entity_id == StringInternPool::NOT_A_STRING_ID) + { + delete new_entity; + return EvaluableNodeReference::Null(); + } + + if(destination_entity_parent == curEntity) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, new_entity_id), true); + else //need to return an id list + return EvaluableNodeReference(GetTraversalIDPathListFromAToB(evaluableNodeManager, curEntity, new_entity), true); +} diff --git a/src/Amalgam/interpreter/InterpreterOpcodesDataTypes.cpp b/src/Amalgam/interpreter/InterpreterOpcodesDataTypes.cpp new file mode 100644 index 00000000..e428f8fd --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterOpcodesDataTypes.cpp @@ -0,0 +1,1886 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "Cryptography.h" +#include "DateTimeFormat.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "EvaluableNodeTreeDifference.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EntityWriteListener.h" +#include "FileSupportJSON.h" +#include "FileSupportYAML.h" +#include "PerformanceProfiler.h" +#include "PlatformSpecific.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include +#include + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TRUE(EvaluableNode *en) +{ + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_TRUE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_FALSE(EvaluableNode *en) +{ + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_NULL(EvaluableNode *en) +{ + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_LIST(EvaluableNode *en) +{ + //if idempotent, can just return a copy without any metadata + if(en->GetIsIdempotent()) + return evaluableNodeManager->DeepAllocCopy(en, EvaluableNodeManager::ENMM_REMOVE_ALL); + + EvaluableNodeReference new_list(evaluableNodeManager->AllocNode(ENT_LIST), true); + + auto &ocn = en->GetOrderedChildNodes(); + size_t num_nodes = ocn.size(); + if(num_nodes > 0) + { + new_list->ReserveOrderedChildNodes(num_nodes); + + #ifdef MULTITHREAD_SUPPORT + if(en->GetConcurrency() && num_nodes > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + auto node_stack = CreateInterpreterNodeStackStateSaver(new_list); + + ConcurrencyManager concurrency_manager(this, num_nodes); + + //kick off interpreters + for(size_t node_index = 0; node_index < num_nodes; node_index++) + concurrency_manager.PushTaskToResultFuturesWithConstructionStack(ocn[node_index], en, new_list, + EvaluableNodeImmediateValueWithType(static_cast(node_index)), nullptr); + + enqueue_task_lock.Unlock(); + + concurrency_manager.EndConcurrency(); + + for(auto &value : concurrency_manager.GetResultsAndFreeReferences()) + { + //add it to the list + new_list->AppendOrderedChildNode(value); + new_list.UpdatePropertiesBasedOnAttachedNode(value); + } + + return new_list; + } + } + #endif + + //construction stack has a reference, so no KeepNodeReference isn't needed for anything referenced + PushNewConstructionContext(en, new_list, EvaluableNodeImmediateValueWithType(0.0), nullptr); + + for(size_t i = 0; i < ocn.size(); i++) + { + SetTopTargetValueIndexInConstructionStack(static_cast(i)); + + auto value = InterpretNode(ocn[i]); + //add it to the list + new_list->AppendOrderedChildNode(value); + new_list.UpdatePropertiesBasedOnAttachedNode(value); + } + + PopConstructionContext(); + } + + return new_list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ASSOC(EvaluableNode *en) +{ + //if idempotent, can just return a copy without any metadata + if(en->GetIsIdempotent()) + { + EvaluableNodeReference retval = evaluableNodeManager->DeepAllocCopy(en, EvaluableNodeManager::ENMM_REMOVE_ALL); + return retval; + } + + //create a new assoc from the previous + EvaluableNodeReference new_assoc(evaluableNodeManager->AllocNode(en, EvaluableNodeManager::ENMM_REMOVE_ALL), true); + + //copy of the original evaluable node's mcn + auto &new_mcn = new_assoc->GetMappedChildNodesReference(); + size_t num_nodes = new_mcn.size(); + + if(num_nodes > 0) + { + + #ifdef MULTITHREAD_SUPPORT + if(en->GetConcurrency() && num_nodes > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + auto node_stack = CreateInterpreterNodeStackStateSaver(new_assoc); + ConcurrencyManager concurrency_manager(this, num_nodes); + + //kick off interpreters + for(auto &[cn_id, cn] : new_mcn) + concurrency_manager.PushTaskToResultFuturesWithConstructionStack(cn, en, new_assoc, EvaluableNodeImmediateValueWithType(cn_id), nullptr); + + enqueue_task_lock.Unlock(); + concurrency_manager.EndConcurrency(); + + //add results to assoc + auto results = concurrency_manager.GetResultsAndFreeReferences(); + //will iterate in the same order as above + size_t result_index = 0; + for(auto &[_, cn] : new_mcn) + { + auto &value = results[result_index++]; + + //add it to the list + cn = value; + new_assoc.UpdatePropertiesBasedOnAttachedNode(value); + } + + return new_assoc; + } + } + #endif + + //construction stack has a reference, so no KeepNodeReference isn't needed for anything referenced + PushNewConstructionContext(en, new_assoc, EvaluableNodeImmediateValueWithType(StringInternPool::NOT_A_STRING_ID), nullptr); + + for(auto &[cn_id, cn] : new_mcn) + { + SetTopTargetValueIndexInConstructionStack(cn_id); + + //compute the value + EvaluableNodeReference element_result = InterpretNode(cn); + + cn = element_result; + new_assoc.UpdatePropertiesBasedOnAttachedNode(element_result); + } + + PopConstructionContext(); + } + + return new_assoc; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_NUMBER(EvaluableNode *en) +{ + return EvaluableNodeReference(evaluableNodeManager->AllocNode(en->GetNumberValue()), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_STRING(EvaluableNode *en) +{ + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, en->GetStringID()), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SYMBOL(EvaluableNode *en) +{ + StringInternPool::StringID sid = EvaluableNode::ToStringIDIfExists(en); + if(sid == StringInternPool::NOT_A_STRING_ID) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference value(GetExecutionContextSymbol(sid), false); + if(value != nullptr) + return value; + + //if didn't find it in the stack, try it in the labels + if(curEntity != nullptr) + return curEntity->GetValueAtLabel(sid, nullptr, true, true); + + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_TYPE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto cur = InterpretNodeForImmediateUse(ocn[0]); + EvaluableNodeType type = ENT_NULL; + if(cur != nullptr) + type = cur->GetType(); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(type), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_TYPE_STRING(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto cur = InterpretNodeForImmediateUse(ocn[0]); + EvaluableNodeType type = ENT_NULL; + if(cur != nullptr) + type = cur->GetType(); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + std::string type_string = GetStringFromEvaluableNodeType(type, true); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, type_string), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_TYPE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //get the target + auto source = InterpretNode(ocn[0]); + if(source == nullptr) + source = EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_NULL), true); + + if(!source.unique) + source.reference = evaluableNodeManager->AllocNode(source); + + auto node_stack = CreateInterpreterNodeStackStateSaver(source); + + //get the type to set + EvaluableNodeType new_type = ENT_NULL; + auto type_node = InterpretNodeForImmediateUse(ocn[1]); + if(type_node != nullptr) + { + if(type_node->GetType() == ENT_STRING) + { + StringInternPool::StringID sid = type_node->GetStringID(); + new_type = GetEvaluableNodeTypeFromStringId(sid); + } + else + new_type = type_node->GetType(); + } + evaluableNodeManager->FreeNodeTreeIfPossible(type_node); + + if(new_type == ENT_NOT_A_BUILT_IN_TYPE) + new_type = ENT_NULL; + + source->SetType(new_type, evaluableNodeManager); + + return source; +} + +//reinterprets a char value to DestinationType +template +constexpr DestinationType ExpandCharStorage(char &value) +{ + return static_cast(reinterpret_cast(value)); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_FORMAT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 3) + return EvaluableNodeReference::Null(); + + StringInternPool::StringID from_type = InterpretNodeIntoStringIDValueWithReference(ocn[1]); + StringInternPool::StringID to_type = InterpretNodeIntoStringIDValueWithReference(ocn[2]); + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + bool node_stack_needs_popping = false; + + EvaluableNodeReference from_params; + if(ocn.size() > 3) + { + from_params = InterpretNodeForImmediateUse(ocn[3]); + node_stack.PushEvaluableNode(from_params); + node_stack_needs_popping = true; + } + + bool use_code = false; + EvaluableNodeReference code_value; + + bool use_number = false; + double number_value = 0; + + bool use_uint_number = false; + uint64_t uint_number_value = 0; + + bool use_int_number = false; + int64_t int_number_value = 0; + + bool use_string = false; + std::string string_value = ""; + + const std::string date_string("date:"); + + if(from_type == GetStringIdFromNodeTypeFromString(ENT_NUMBER)) + { + use_number = true; + number_value = InterpretNodeIntoNumberValue(ocn[0]); + } + else if(from_type == ENBISI_code) + { + use_code = true; + code_value = InterpretNodeForImmediateUse(ocn[0]); + } + else //base on string type + { + string_value = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + + if(from_type == GetStringIdFromNodeTypeFromString(ENT_STRING)) + { + use_string = true; + } + else if(from_type == ENBISI_Base16) + { + use_string = true; + string_value = StringManipulation::Base16ToBinaryString(string_value); + } + else if(from_type == ENBISI_Base64) + { + use_string = true; + string_value = StringManipulation::Base64ToBinaryString(string_value); + } + else if(from_type == ENBISI_uint8 || from_type == ENBISI_UINT8) + { + use_uint_number = true; + uint_number_value = reinterpret_cast(string_value[0]); + } + else if(from_type == ENBISI_int8 || from_type == ENBISI_INT8) + { + use_int_number = true; + int_number_value = ExpandCharStorage(string_value[0]); + } + else if(from_type == ENBISI_uint16) + { + use_uint_number = true; + if(string_value.size() >= 2) + uint_number_value = ExpandCharStorage(string_value[0]) | (ExpandCharStorage(string_value[1]) << 8); + } + else if(from_type == ENBISI_UINT16) + { + use_uint_number = true; + if(string_value.size() >= 2) + uint_number_value = ExpandCharStorage(string_value[1]) | (ExpandCharStorage(string_value[1]) << 8); + } + else if(from_type == ENBISI_int16) + { + use_int_number = true; + if(string_value.size() >= 2) //sign extend the most significant byte + int_number_value = ExpandCharStorage(string_value[0]) | (ExpandCharStorage(string_value[1]) << 8); + } + else if(from_type == ENBISI_INT16) + { + use_int_number = true; + if(string_value.size() >= 2) //sign extend the most significant byte + int_number_value = ExpandCharStorage(string_value[1]) | (ExpandCharStorage(string_value[0]) << 8); + } + else if(from_type == ENBISI_uint32) + { + use_uint_number = true; + if(string_value.size() >= 4) + uint_number_value = ExpandCharStorage(string_value[0]) | (ExpandCharStorage(string_value[1]) << 8) + | (ExpandCharStorage(string_value[2]) << 16) | (ExpandCharStorage(string_value[3]) << 24); + } + else if(from_type == ENBISI_UINT32) + { + use_uint_number = true; + if(string_value.size() >= 4) + uint_number_value = ExpandCharStorage(string_value[3]) | (ExpandCharStorage(string_value[2]) << 8) + | (ExpandCharStorage(string_value[1]) << 16) | (ExpandCharStorage(string_value[0]) << 24); + } + else if(from_type == ENBISI_int32) + { + use_int_number = true; + if(string_value.size() >= 4) //sign extend the most significant byte + int_number_value = ExpandCharStorage(string_value[0]) | (ExpandCharStorage(string_value[1]) << 8) + | (ExpandCharStorage(string_value[2]) << 16) | (ExpandCharStorage(string_value[3]) << 24); + } + else if(from_type == ENBISI_INT32) + { + use_int_number = true; + if(string_value.size() >= 4) //sign extend the most significant byte + int_number_value = ExpandCharStorage(string_value[3]) | (ExpandCharStorage(string_value[2]) << 8) + | (ExpandCharStorage(string_value[1]) << 16) | (ExpandCharStorage(string_value[0]) << 24); + } + else if(from_type == ENBISI_uint64) + { + use_uint_number = true; + if(string_value.size() >= 8) + uint_number_value = + ExpandCharStorage(string_value[0]) | (ExpandCharStorage(string_value[1]) << 8) + | (ExpandCharStorage(string_value[2]) << 16) | (ExpandCharStorage(string_value[3]) << 24) + | (ExpandCharStorage(string_value[4]) << 32) | (ExpandCharStorage(string_value[5]) << 40) + | (ExpandCharStorage(string_value[6]) << 48) | (ExpandCharStorage(string_value[7]) << 56); + } + else if(from_type == ENBISI_UINT64) + { + use_uint_number = true; + if(string_value.size() >= 8) + uint_number_value = + ExpandCharStorage(string_value[7]) | (ExpandCharStorage(string_value[6]) << 8) + | (ExpandCharStorage(string_value[5]) << 16) | (ExpandCharStorage(string_value[4]) << 24) + | (ExpandCharStorage(string_value[3]) << 32) | (ExpandCharStorage(string_value[2]) << 40) + | (ExpandCharStorage(string_value[1]) << 48) | (ExpandCharStorage(string_value[0]) << 56); + } + else if(from_type == ENBISI_int64) + { + use_int_number = true; + if(string_value.size() >= 8) + { + uint_number_value = + ExpandCharStorage(string_value[0]) | (ExpandCharStorage(string_value[1]) << 8) + | (ExpandCharStorage(string_value[2]) << 16) | (ExpandCharStorage(string_value[3]) << 24) + | (ExpandCharStorage(string_value[4]) << 32) | (ExpandCharStorage(string_value[5]) << 40) + | (ExpandCharStorage(string_value[6]) << 48) | (ExpandCharStorage(string_value[7]) << 56); + int_number_value = reinterpret_cast(uint_number_value); + } + } + else if(from_type == ENBISI_INT64) + { + use_int_number = true; + if(string_value.size() >= 8) + { + uint_number_value = + ExpandCharStorage(string_value[7]) | (ExpandCharStorage(string_value[6]) << 8) + | (ExpandCharStorage(string_value[5]) << 16) | (ExpandCharStorage(string_value[4]) << 24) + | (ExpandCharStorage(string_value[3]) << 32) | (ExpandCharStorage(string_value[2]) << 40) + | (ExpandCharStorage(string_value[1]) << 48) | (ExpandCharStorage(string_value[0]) << 56); + int_number_value = reinterpret_cast(uint_number_value); + } + } + else if(from_type == ENBISI_float) + { + use_number = true; + if(string_value.size() >= 4) + { + uint32_t temp = + ExpandCharStorage(string_value[0]) | (ExpandCharStorage(string_value[1]) << 8) + | (ExpandCharStorage(string_value[2]) << 16) | (ExpandCharStorage(string_value[3]) << 24); + number_value = reinterpret_cast(temp); + } + } + else if(from_type == ENBISI_FLOAT) + { + use_number = true; + if(string_value.size() >= 4) + { + uint32_t temp = + ExpandCharStorage(string_value[3]) | (ExpandCharStorage(string_value[2]) << 8) + | (ExpandCharStorage(string_value[1]) << 16) | (ExpandCharStorage(string_value[0]) << 24); + number_value = reinterpret_cast(temp); + } + } + else if(from_type == ENBISI_double) + { + use_number = true; + if(string_value.size() >= 8) + { + uint_number_value = + ExpandCharStorage(string_value[0]) | (ExpandCharStorage(string_value[1]) << 8) + | (ExpandCharStorage(string_value[2]) << 16) | (ExpandCharStorage(string_value[3]) << 24) + | (ExpandCharStorage(string_value[4]) << 32) | (ExpandCharStorage(string_value[5]) << 40) + | (ExpandCharStorage(string_value[6]) << 48) | (ExpandCharStorage(string_value[7]) << 56); + number_value = reinterpret_cast(uint_number_value); + } + } + else if(from_type == ENBISI_DOUBLE) + { + use_number = true; + if(string_value.size() >= 8) + { + uint_number_value = + ExpandCharStorage(string_value[7]) | (ExpandCharStorage(string_value[6]) << 8) + | (ExpandCharStorage(string_value[5]) << 16) | (ExpandCharStorage(string_value[4]) << 24) + | (ExpandCharStorage(string_value[3]) << 32) | (ExpandCharStorage(string_value[2]) << 40) + | (ExpandCharStorage(string_value[1]) << 48) | (ExpandCharStorage(string_value[0]) << 56); + number_value = reinterpret_cast(uint_number_value); + } + } + else if(from_type == ENBISI_json) + { + use_code = true; + code_value = EvaluableNodeReference(EvaluableNodeJSONTranslation::JsonToEvaluableNode(evaluableNodeManager, string_value), true); + } + else if(from_type == ENBISI_yaml) + { + use_code = true; + code_value = EvaluableNodeReference(EvaluableNodeYAMLTranslation::YamlToEvaluableNode(evaluableNodeManager, string_value), true); + } + else //need to parse the string + { + const auto &from_type_str = string_intern_pool.GetStringFromID(from_type); + + //see if it starts with the date string + if(from_type_str.compare(0, date_string.size(), date_string) == 0) + { + std::string locale; + std::string timezone; + if(EvaluableNode::IsAssociativeArray(from_params)) + { + auto &mcn = from_params->GetMappedChildNodesReference(); + + auto found_locale = mcn.find(ENBISI_locale); + if(found_locale != end(mcn)) + locale = EvaluableNode::ToString(found_locale->second); + + auto found_timezone = mcn.find(ENBISI_timezone); + if(found_timezone != end(mcn)) + timezone = EvaluableNode::ToString(found_timezone->second); + } + + use_number = true; + number_value = GetNumSecondsSinceEpochFromDateTimeString(string_value, from_type_str.c_str() + date_string.size(), locale, timezone); + } + } + } + + //have everything from from_type, so no longer need the reference + if(node_stack_needs_popping) + node_stack.PopEvaluableNode(); + string_intern_pool.DestroyStringReference(from_type); + evaluableNodeManager->FreeNodeTreeIfPossible(from_params); + + EvaluableNodeReference to_params; + if(ocn.size() > 4) + to_params = InterpretNodeForImmediateUse(ocn[4]); + + //convert + if(to_type == GetStringIdFromNodeTypeFromString(ENT_NUMBER)) + { + //don't need to do anything if use_number + if(use_uint_number) + number_value = static_cast(uint_number_value); + else if(use_int_number) + number_value = static_cast(int_number_value); + else if(use_string) + { + auto [converted_value, success] = Platform_StringToNumber(string_value); + if(success) + number_value = converted_value; + } + else if(use_code) + number_value = EvaluableNode::ToNumber(code_value); + + string_intern_pool.DestroyStringReference(to_type); + evaluableNodeManager->FreeNodeTreeIfPossible(to_params); + + //didn't return code_value, so can free it + evaluableNodeManager->FreeNodeTreeIfPossible(code_value); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(number_value), true); + } + else if(to_type == ENBISI_code) + { + string_intern_pool.DestroyStringReference(to_type); + evaluableNodeManager->FreeNodeTreeIfPossible(to_params); + return code_value; + } + else if(to_type == GetStringIdFromNodeTypeFromString(ENT_STRING)) + { + //don't need to do anything if use_string + if(use_number) + string_value = EvaluableNode::NumberToString(number_value); + else if(use_uint_number) + string_value = EvaluableNode::NumberToString(static_cast(uint_number_value)); + else if(use_int_number) + string_value = EvaluableNode::NumberToString(static_cast(int_number_value)); + else if(use_code) + { + bool sort_keys = false; + if(EvaluableNode::IsAssociativeArray(to_params)) + { + auto &mcn = to_params->GetMappedChildNodesReference(); + + auto found_sort_keys = mcn.find(ENBISI_sort_keys); + if(found_sort_keys != end(mcn)) + sort_keys = EvaluableNode::IsTrue(found_sort_keys->second); + } + + string_value = Parser::Unparse(code_value, evaluableNodeManager, false, true, sort_keys); + } + } + else if(to_type == ENBISI_Base16 || to_type == ENBISI_Base64) + { + if(use_number) + { + string_value = StringManipulation::To8ByteStringLittleEndian(number_value); + } + else if(use_int_number) + { + if(int_number_value >= std::numeric_limits::min() + && int_number_value <= std::numeric_limits::max()) + string_value = StringManipulation::To1ByteString(static_cast(int_number_value)); + else if(int_number_value >= std::numeric_limits::min() + && int_number_value <= std::numeric_limits::max()) + string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(int_number_value)); + else if(int_number_value >= std::numeric_limits::min() + && int_number_value <= std::numeric_limits::max()) + string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(int_number_value)); + else + string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(int_number_value)); + } + else if(use_uint_number) + { + if(uint_number_value <= std::numeric_limits::max()) + string_value = StringManipulation::To1ByteString(static_cast(uint_number_value)); + else if(uint_number_value <= std::numeric_limits::max()) + string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(uint_number_value)); + else if(uint_number_value <= std::numeric_limits::max()) + string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(uint_number_value)); + else + string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(uint_number_value)); + } + //else use_string or use_code + + //if using code, just reuse string value + if(use_code) + string_value = Parser::Unparse(code_value, evaluableNodeManager, false); + + if(to_type == ENBISI_Base16) + string_value = StringManipulation::BinaryStringToBase16(string_value); + else //Base64 + string_value = StringManipulation::BinaryStringToBase64(string_value); + } + else if(to_type == ENBISI_uint8 || to_type == ENBISI_UINT8) + { + if(use_number) string_value = StringManipulation::To1ByteString(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To1ByteString(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To1ByteString(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To1ByteString(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_int8 || to_type == ENBISI_INT8) + { + if(use_number) string_value = StringManipulation::To1ByteString(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To1ByteString(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To1ByteString(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To1ByteString(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_uint16) + { + if(use_number) string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_UINT16) + { + if(use_number) string_value = StringManipulation::To2ByteStringBigEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To2ByteStringBigEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To2ByteStringBigEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To2ByteStringBigEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_int16) + { + if(use_number) string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To2ByteStringLittleEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_INT16) + { + if(use_number) string_value = StringManipulation::To2ByteStringBigEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To2ByteStringBigEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To2ByteStringBigEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To2ByteStringBigEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_uint32) + { + if(use_number) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_UINT32) + { + if(use_number) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_int32) + { + if(use_number) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_INT32) + { + if(use_number) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_uint64) + { + if(use_number) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_UINT64) + { + if(use_number) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(EvaluableNode::ToNumber(code_value))); + + } + else if(to_type == ENBISI_int64) + { + if(use_number) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_INT64) + { + if(use_number) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_float) + { + if(use_number) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To4ByteStringLittleEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_FLOAT) + { + if(use_number) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To4ByteStringBigEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_double) + { + if(use_number) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To8ByteStringLittleEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_DOUBLE) + { + if(use_number) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(number_value)); + else if(use_uint_number) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(uint_number_value)); + else if(use_int_number) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(int_number_value)); + else if(use_code) string_value = StringManipulation::To8ByteStringBigEndian(static_cast(EvaluableNode::ToNumber(code_value))); + } + else if(to_type == ENBISI_json) + { + if(use_number) + string_value = EvaluableNode::NumberToString(number_value); + else if(use_uint_number) + string_value = EvaluableNode::NumberToString(static_cast(uint_number_value)); + else if(use_int_number) + string_value = EvaluableNode::NumberToString(static_cast(int_number_value)); + else if(use_string) + { + EvaluableNode en_str(ENT_STRING, string_value); + string_value = EvaluableNodeJSONTranslation::EvaluableNodeToJson(&en_str); + } + else if(use_code) + { + bool sort_keys = false; + if(EvaluableNode::IsAssociativeArray(to_params)) + { + auto &mcn = to_params->GetMappedChildNodesReference(); + + auto found_sort_keys = mcn.find(ENBISI_sort_keys); + if(found_sort_keys != end(mcn)) + sort_keys = EvaluableNode::IsTrue(found_sort_keys->second); + } + + string_value = EvaluableNodeJSONTranslation::EvaluableNodeToJson(code_value, sort_keys); + } + } + else if(to_type == ENBISI_yaml) + { + if(use_number) + { + EvaluableNode value(number_value); + string_value = EvaluableNodeYAMLTranslation::EvaluableNodeToYaml(&value); + } + else if(use_uint_number) + { + EvaluableNode value(static_cast(uint_number_value)); + string_value = EvaluableNodeYAMLTranslation::EvaluableNodeToYaml(&value); + } + else if(use_int_number) + { + EvaluableNode value(static_cast(int_number_value)); + string_value = EvaluableNodeYAMLTranslation::EvaluableNodeToYaml(&value); + } + else if(use_string) + { + EvaluableNode en_str(ENT_STRING, string_value); + string_value = EvaluableNodeYAMLTranslation::EvaluableNodeToYaml(&en_str); + } + else if(use_code) + { + bool sort_keys = false; + if(EvaluableNode::IsAssociativeArray(to_params)) + { + auto &mcn = to_params->GetMappedChildNodesReference(); + + auto found_sort_keys = mcn.find(ENBISI_sort_keys); + if(found_sort_keys != end(mcn)) + sort_keys = EvaluableNode::IsTrue(found_sort_keys->second); + } + + string_value = EvaluableNodeYAMLTranslation::EvaluableNodeToYaml(code_value, sort_keys); + } + } + else //need to parse the string + { + const auto &to_type_str = string_intern_pool.GetStringFromID(to_type); + + //if it starts with the date string + if(to_type_str.compare(0, date_string.size(), date_string) == 0) + { + std::string locale; + std::string timezone; + if(EvaluableNode::IsAssociativeArray(to_params)) + { + auto &mcn = to_params->GetMappedChildNodesReference(); + + auto found_locale = mcn.find(ENBISI_locale); + if(found_locale != end(mcn)) + locale = EvaluableNode::ToString(found_locale->second); + + auto found_timezone = mcn.find(ENBISI_timezone); + if(found_timezone != end(mcn)) + timezone = EvaluableNode::ToString(found_timezone->second); + } + + double num_secs_from_epoch = 0.0; + if(use_number) num_secs_from_epoch = number_value; + else if(use_uint_number) num_secs_from_epoch = static_cast(uint_number_value); + else if(use_int_number) num_secs_from_epoch = static_cast(int_number_value); + else if(use_code) num_secs_from_epoch = static_cast(EvaluableNode::ToNumber(code_value)); + + string_value = GetDateTimeStringFromNumSecondsSinceEpoch(num_secs_from_epoch, to_type_str.c_str() + date_string.size(), locale, timezone); + } + } + + string_intern_pool.DestroyStringReference(to_type); + evaluableNodeManager->FreeNodeTreeIfPossible(to_params); + + //didn't return code_value, so can free it + evaluableNodeManager->FreeNodeTreeIfPossible(code_value); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, string_value), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_LABELS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference n = InterpretNodeForImmediateUse(ocn[0]); + if(n == nullptr) + return EvaluableNodeReference::Null(); + + size_t num_labels = n->GetNumLabels(); + + //make list of labels + EvaluableNodeReference result(evaluableNodeManager->AllocListNodeWithOrderedChildNodes(ENT_STRING, num_labels), true); + auto &result_ocn = result->GetOrderedChildNodes(); + + //because labels can be stored in different ways, it is just easiest to iterate + // rather than to get a reference to each string id + for(size_t i = 0; i < num_labels; i++) + result_ocn[i]->SetStringID(n->GetLabelStringId(i)); + + evaluableNodeManager->FreeNodeTreeIfPossible(n); + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_ALL_LABELS(EvaluableNode *en) +{ + EvaluableNodeReference n = EvaluableNodeReference::Null(); + + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() > 0) + n = InterpretNodeForImmediateUse(ocn[0]); + + EvaluableNodeReference result(evaluableNodeManager->AllocNode(ENT_ASSOC), n.unique); + + auto label_sids_to_nodes = EvaluableNodeTreeManipulation::RetrieveLabelIndexesFromTree(n.reference); + + string_intern_pool.CreateStringReferences(label_sids_to_nodes, [](auto it) { return it.first; }); + result->ReserveMappedChildNodes(label_sids_to_nodes.size()); + for(auto &[node_id, node] : label_sids_to_nodes) + result->SetMappedChildNodeWithReferenceHandoff(node_id, node); + + //can't guarantee there weren't any cycles if more than one label + if(label_sids_to_nodes.size() > 1) + result->SetNeedCycleCheck(true); + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_LABELS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto source = InterpretNode(ocn[0]); + if(source == nullptr) + source = EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_NULL), true); + + if(!source.unique) + source.reference = evaluableNodeManager->AllocNode(source); + + auto node_stack = CreateInterpreterNodeStackStateSaver(source); + + //get the labels + auto labels_node = InterpretNodeForImmediateUse(ocn[1]); + if(labels_node != nullptr && labels_node->GetType() != ENT_LIST) + { + evaluableNodeManager->FreeNodeTreeIfPossible(labels_node); + return source; + } + + source->ClearLabels(); + + //if adding labels, then grab from the provided list + if(labels_node != nullptr) + { + for(auto &e : labels_node->GetOrderedChildNodes()) + { + if(e != nullptr) + { + StringInternPool::StringID label_sid = EvaluableNode::ToStringIDWithReference(e); + source->AppendLabelStringId(label_sid, true); + } + } + } + evaluableNodeManager->FreeNodeTreeIfPossible(labels_node); + + return source; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ZIP_LABELS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto label_list = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(label_list); + + auto source = InterpretNode(ocn[1]); + + //if no label list, or no source or source is immediate, then just return the source + if(label_list == nullptr || !label_list->IsOrderedArray() + || source == nullptr || !source->IsOrderedArray()) + return source; + + node_stack.PopEvaluableNode(); + + //make copy to populate with copies of the child nodes + //start assuming that the copy will be unique, but set to not unique if any chance the assumption + // might not hold + EvaluableNodeReference retval = source; + if(!source.unique) + retval = EvaluableNodeReference(evaluableNodeManager->AllocNode(source), true); + + auto &label_list_ocn = label_list->GetOrderedChildNodesReference(); + + //copy over labels, but keep track if all are unique + auto &retval_ocn = retval->GetOrderedChildNodesReference(); + for(size_t i = 0; i < retval_ocn.size(); i++) + { + //no more labels to add, so just reuse the existing nodes + if(i >= label_list_ocn.size()) + { + retval.unique = false; + break; + } + + StringInternPool::StringID label_sid = EvaluableNode::ToStringIDWithReference(label_list_ocn[i]); + + EvaluableNode *cur_value = retval_ocn[i]; + if(!source.unique || cur_value == nullptr) + { + //make a copy of the node to set the label on + if(cur_value == nullptr) + { + cur_value = evaluableNodeManager->AllocNode(ENT_NULL); + } + else + { + cur_value = evaluableNodeManager->AllocNode(cur_value); + + //if the node has child nodes, then can't guarantee uniqueness + if(cur_value->GetNumChildNodes() > 0) + retval.unique = false; + } + + retval_ocn[i] = cur_value; + } + + //if cur_value has appeared before as a child node, then it will have at least one other label + //if it has a label, it could have been a previous child node, therefore can't guarantee uniqueness + if(cur_value->GetNumLabels() > 0) + retval.unique = false; + + cur_value->AppendLabelStringId(label_sid, true); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(label_list); + + //if all child nodes are unique, then it doesn't need a cycle check + if(retval.unique) + retval->SetNeedCycleCheck(false); + + return retval; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_COMMENTS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto n = InterpretNodeForImmediateUse(ocn[0]); + if(n == nullptr) + return EvaluableNodeReference::Null(); + + StringInternPool::StringID comments_sid = n->GetCommentsStringId(); + evaluableNodeManager->FreeNodeTreeIfPossible(n); + + if(comments_sid == StringInternPool::NOT_A_STRING_ID) + return EvaluableNodeReference::Null(); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, comments_sid), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_COMMENTS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto source = InterpretNode(ocn[0]); + if(source == nullptr) + source = EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_NULL), true); + if(!source.unique) + source.reference = evaluableNodeManager->AllocNode(source); + + auto node_stack = CreateInterpreterNodeStackStateSaver(source); + + //get the comments + StringInternPool::StringID new_comments_sid = InterpretNodeIntoStringIDValueWithReference(ocn[1]); + source->SetCommentsStringId(new_comments_sid, true); + + return source; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_CONCURRENCY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + auto n = InterpretNodeForImmediateUse(ocn[0]); + if(n == nullptr) + return EvaluableNodeReference::Null(); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(n->GetConcurrency() ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_CONCURRENCY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto source = InterpretNode(ocn[0]); + if(source == nullptr) + source = EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_NULL), true); + else if(!source.unique) + source.reference = evaluableNodeManager->AllocNode(source); + + auto node_stack = CreateInterpreterNodeStackStateSaver(source); + + //get the concurrent flag + bool concurrency = InterpretNodeIntoBoolValue(ocn[1]); + source->SetConcurrency(concurrency); + + return source; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_VALUE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + auto n = InterpretNode(ocn[0]); + if(n == nullptr) + return EvaluableNodeReference::Null(); + + if(n.unique) + { + n->ClearMetadata(); + } + else + { + n.reference = evaluableNodeManager->AllocNode(n, EvaluableNodeManager::ENMM_REMOVE_ALL); + if(n->GetNumChildNodes() == 0) + n.unique = true; + } + + return n; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_VALUE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto source = InterpretNode(ocn[0]); + if(source == nullptr) + source = EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_NULL), true); + if(!source.unique) + source.reference = evaluableNodeManager->AllocNode(source); + + auto node_stack = CreateInterpreterNodeStackStateSaver(source); + + //get the new value + auto value_node = InterpretNode(ocn[1]); + source->CopyValueFrom(value_node); + source.UpdatePropertiesBasedOnAttachedNode(value_node); + + return source; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_EXPLODE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto [valid, str] = InterpretNodeIntoStringValue(ocn[0]); + if(!valid) + return EvaluableNodeReference::Null(); + + EvaluableNode *result = evaluableNodeManager->AllocNode(ENT_LIST); + auto node_stack = CreateInterpreterNodeStackStateSaver(result); + + //a stride of 0 means use variable width utf-8 + size_t stride = 0; + if(ocn.size() > 1) + { + double raw_stride = InterpretNodeIntoNumberValue(ocn[1]); + if(raw_stride > 0) + stride = static_cast(raw_stride); + } + + if(stride == 0) + { + //pessimistically reserve enough space assuming worst case of each byte being its own character + result->ReserveOrderedChildNodes(str.size()); + + size_t utf8_char_start_offset = 0; + while(utf8_char_start_offset < str.size()) + { + size_t utf8_char_length = StringManipulation::GetUTF8CharacterLength(str, utf8_char_start_offset); + //done if no more characters + if(utf8_char_length == 0) + break; + + //create a new node for each character in the string + result->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, str.substr(utf8_char_start_offset, utf8_char_length))); + + utf8_char_start_offset += utf8_char_length; + } + } + else //nonzero stride + { + //reserve enough space, and round up for any remainder + result->ReserveOrderedChildNodes((str.size() + (stride - 1)) / stride); + + while(str.size() >= stride) + { + std::string substr(begin(str), begin(str) + stride); + result->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, substr)); + + str.erase(0, stride); + } + + //some left over, but less than stride, so just append + if(str.size() > 0) + result->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, str)); + + } + + return EvaluableNodeReference(result, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SPLIT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference retval(evaluableNodeManager->AllocNode(ENT_LIST), true); + auto node_stack = CreateInterpreterNodeStackStateSaver(retval); + + //if only one element, nothing to split on, just return the string in a list + if(ocn.size() == 1) + { + EvaluableNode *str_node = InterpretNodeIntoUniqueStringIDValueEvaluableNode(ocn[0]); + retval->AppendOrderedChildNode(str_node); + return retval; + } + + //have at least two parameters + auto [valid_string_to_split, string_to_split] = InterpretNodeIntoStringValue(ocn[0]); + if(!valid_string_to_split) + { + retval->SetType(ENT_STRING, evaluableNodeManager); + retval->SetStringID(string_intern_pool.NOT_A_STRING_ID); + return retval; + } + + auto [valid_split_value, split_value] = InterpretNodeIntoStringValue(ocn[1]); + if(!valid_split_value) + { + retval->SetType(ENT_STRING, evaluableNodeManager); + retval->SetStringID(string_intern_pool.NOT_A_STRING_ID); + return retval; + } + + double max_split_count = std::numeric_limits::infinity(); + if(ocn.size() >= 3) + { + //only use the value if it's greater than zero + double max_split_count_value = InterpretNodeIntoNumberValue(ocn[2]); + if(max_split_count_value > 0) + max_split_count = max_split_count_value; + } + + //a stride of 0 means use variable width utf-8 + size_t stride = 0; + if(ocn.size() >= 4) + { + double raw_stride = InterpretNodeIntoNumberValue(ocn[3]); + if(raw_stride > 0) + stride = static_cast(raw_stride); + } + + //if stride is 0, then use regex + if(stride == 0) + { + //use nosubs to prevent unnecessary memory allocations since this is just matching + std::regex rx; + try { + rx.assign(split_value, std::regex::ECMAScript | std::regex::nosubs); + } + catch(...) + { + return retval; + } + + //-1 argument indicates splitting rather than matching + std::sregex_token_iterator iter(begin(string_to_split), end(string_to_split), rx, -1); + std::sregex_token_iterator rx_end; + + //split the string + size_t num_split = 0; + for(; iter != rx_end && num_split < max_split_count; ++iter, num_split++) + { + std::string value = *iter; + retval->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, value)); + } + + //ran out of split count, need to include the last bit + if(num_split == max_split_count && iter != rx_end) + { + //determine offset of the beginning of the leftover part of the string not matched + //do this separately because it's nontrivial to get types to match + auto pos = (*iter).first - begin(string_to_split); + std::string value(begin(string_to_split) + pos, end(string_to_split)); + retval->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, value)); + } + } + else //not regex + { + size_t cur_segment_start = 0; + size_t cur_segment_end = 0; + size_t string_to_split_len = string_to_split.length(); + size_t split_value_len = split_value.length(); + + while(cur_segment_end < string_to_split_len && max_split_count > 0) + { + size_t cur_match_position = cur_segment_end; + size_t cur_split_position = 0; + + //advance forward through the split string + while(cur_split_position < split_value_len + && string_to_split[cur_match_position] == split_value[cur_split_position]) + { + cur_match_position += stride; + cur_split_position += stride; + } + + //if found the string + if(cur_split_position >= split_value_len) + { + std::string value(begin(string_to_split) + cur_segment_start, + begin(string_to_split) + cur_match_position - cur_split_position); + retval->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, value)); + + cur_segment_end = cur_match_position; + cur_segment_start = cur_match_position; + + //if infinite, won't count against + max_split_count -= 1; + } + else //didn't find the string, move forward one character + { + cur_segment_end += stride; + } + } + + //attach last segment if it exists + if(cur_segment_start < string_to_split_len) + retval->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, + std::string(begin(string_to_split) + cur_segment_start, end(string_to_split)))); + } + + return retval; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SUBSTR(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + //if only string as the parameter, just return a new copy of the string + if(ocn.size() == 1) + { + return EvaluableNodeReference(evaluableNodeManager->AllocNodeWithReferenceHandoff(ENT_STRING, + EvaluableNode::ToStringIDWithReference(ocn[0])), true); + } + + //have at least 2 params + auto [valid_string_to_substr, string_to_substr] = InterpretNodeIntoStringValue(ocn[0]); + if(!valid_string_to_substr) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, string_intern_pool.NOT_A_STRING_ID), true); + + bool replace_string = false; + std::string replacement_string; + if(ocn.size() >= 4 && !EvaluableNode::IsNull(ocn[3])) + { + replace_string = true; + auto [valid_replacement_string, temp_replacement_string] = InterpretNodeIntoStringValue(ocn[3]); + //because otherwise previous line becomes clunky + std::swap(replacement_string, temp_replacement_string); + + if(!valid_replacement_string) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, string_intern_pool.NOT_A_STRING_ID), true); + } + + EvaluableNodeReference substr_node = InterpretNodeForImmediateUse(ocn[1]); + if(EvaluableNode::IsNull(substr_node)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(substr_node); + return EvaluableNodeReference::Null(); + } + + //if a number, then go by offset + if(substr_node->IsNativelyNumeric()) + { + double start_offset_raw = EvaluableNode::ToNumber(substr_node); + evaluableNodeManager->FreeNodeTreeIfPossible(substr_node); + + double length_raw = static_cast(string_to_substr.size()); + if(ocn.size() >= 3) + length_raw = InterpretNodeIntoNumberValue(ocn[2]); + + //a stride of 0 means use variable width utf-8 + size_t stride = 0; + if(ocn.size() >= 5) + { + double raw_stride = InterpretNodeIntoNumberValue(ocn[4]); + if(raw_stride > 0) + stride = static_cast(raw_stride); + } + + //get start of substring + size_t start_offset = 0; + if(start_offset_raw >= 0) + { + if(stride == 0) + start_offset = StringManipulation::GetNthUTF8CharacterOffset(string_to_substr, static_cast(start_offset_raw)); + else + start_offset = stride * static_cast(start_offset_raw); + } + else if(start_offset_raw < 0) + { + if(stride == 0) + start_offset = StringManipulation::GetNthLastUTF8CharacterOffset(string_to_substr, static_cast(-start_offset_raw)); + else + { + size_t backward_offset = stride * static_cast(-start_offset_raw); + if(backward_offset < string_to_substr.size()) + start_offset = (string_to_substr.size() - backward_offset); + } + } + //if failed both ifs then must be nan, so leave default + + //get end of substring + size_t end_offset = string_to_substr.size(); + //only need to do end processing if have a value smaller than the length + if(length_raw < end_offset) + { + if(length_raw >= 0) + { + if(stride == 0) + end_offset = StringManipulation::GetNthUTF8CharacterOffset(std::string_view(&string_to_substr[start_offset]), static_cast(length_raw)); + else + end_offset = start_offset + stride * static_cast(length_raw); + } + else if(length_raw < 0) + { + if(stride == 0) + { + end_offset = start_offset + StringManipulation::GetNthLastUTF8CharacterOffset(std::string_view(&string_to_substr[start_offset]), + static_cast(-length_raw)); + } + else + { + size_t backward_offset = stride * static_cast(-length_raw); + if(backward_offset < string_to_substr.size()) + end_offset = (string_to_substr.size() - backward_offset); + } + } + //if failed both ifs then must be nan, so leave default + } + + if(replace_string) + { + std::string rebuilt_string; + if(start_offset < string_to_substr.size()) + rebuilt_string += string_to_substr.substr(0, start_offset); + + rebuilt_string += replacement_string; + if(end_offset < string_to_substr.size()) + rebuilt_string += string_to_substr.substr(end_offset); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, rebuilt_string), true); + } + else //return just the substring + { + std::string substr; + if(start_offset < string_to_substr.size() && end_offset > start_offset) + substr = string_to_substr.substr(start_offset, end_offset - start_offset); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, substr), true); + } + } + else if(substr_node->GetType() == ENT_STRING) + { + //make a copy of the string so the node can be freed + //(if this is a performance cost found in profiling, it can be fixed with more logic) + std::string regex_str = substr_node->GetStringValue(); + evaluableNodeManager->FreeNodeTreeIfPossible(substr_node); + + if(replace_string) + { + double max_match_count = std::numeric_limits::infinity(); + if(ocn.size() >= 3) + { + //only use the value if it's greater than zero + double max_match_count_value = InterpretNodeIntoNumberValue(ocn[2]); + if(max_match_count_value > 0) + max_match_count = max_match_count_value; + } + + std::regex rx; + try { + rx.assign(regex_str, std::regex::ECMAScript); + } + catch(...) + { + //bad regex, so nothing was replaced, just return original + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, string_to_substr), true); + } + + std::string updated_string; + if(max_match_count == std::numeric_limits::infinity()) + { + updated_string = std::regex_replace(string_to_substr, rx, replacement_string); + } + else //need to count matches + { + auto out = std::back_inserter(updated_string); + auto iter = std::sregex_iterator(begin(string_to_substr), end(string_to_substr), rx); + auto end = std::sregex_iterator(); + auto last_iter = iter; + + for(size_t n = static_cast(max_match_count); n > 0 && iter != end; ++iter, n--) + { + //copy out the replacement + out = std::copy(iter->prefix().first, iter->prefix().second, out); + out = iter->format(out, replacement_string); + last_iter = iter; + } + + //reset out to the full string + out = std::copy(last_iter->suffix().first, last_iter->suffix().second, out); + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, updated_string), true); + } + else //finding matches + { + EvaluableNodeReference param_node; + if(ocn.size() >= 3) + param_node = InterpretNodeForImmediateUse(ocn[2]); + + //these three options are mutually exclusive + //if true, returns first full match as a string + bool first_match_only = true; + //if true, returns full matches up to match_count + bool full_matches = false; + //if true, returns all submatches up to match_count + bool submatches = false; + //maximum number of matches allowed + double max_match_count = std::numeric_limits::infinity(); + + if(!EvaluableNode::IsNull(param_node)) + { + if(param_node->GetType() == ENT_STRING) + { + auto pnsid = param_node->GetStringIDReference(); + if(pnsid == ENBISI_all) + { + first_match_only = false; + full_matches = true; + } + else if(pnsid == ENBISI_submatches) + { + first_match_only = false; + submatches = true; + } + } + else + { + double param_num = EvaluableNode::ToNumber(param_node); + if(param_num >= 0) + { + first_match_only = false; + full_matches = true; + max_match_count = param_num; + } + else if(param_num < 0) + { + first_match_only = false; + submatches = true; + max_match_count = -param_num; + } + //else NaN -- leave defaults + + } + + evaluableNodeManager->FreeNodeTreeIfPossible(param_node); + } + + if(first_match_only) + { + //find first match, don't need submatches + std::regex rx; + try { + rx.assign(regex_str, std::regex::ECMAScript | std::regex::nosubs); + } + catch(...) + { + //bad regex, return same as not found + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, string_intern_pool.NOT_A_STRING_ID), true); + } + + std::sregex_token_iterator iter(begin(string_to_substr), end(string_to_substr), rx); + std::sregex_token_iterator rx_end; + if(iter == rx_end) + { + //not found + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, string_intern_pool.NOT_A_STRING_ID), true); + } + else + { + std::string value = *iter; + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, value), true); + } + } + else if(full_matches) + { + EvaluableNodeReference retval(evaluableNodeManager->AllocNode(ENT_LIST), true); + + //find all the matches, don't need submatches + std::regex rx; + try { + rx.assign(regex_str, std::regex::ECMAScript | std::regex::nosubs); + } + catch(...) + { + return retval; + } + + size_t num_split = 0; + std::sregex_token_iterator iter(begin(string_to_substr), end(string_to_substr), rx); + std::sregex_token_iterator rx_end; + for(; iter != rx_end && num_split < max_match_count; ++iter, num_split++) + { + std::string value = *iter; + retval->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, value)); + } + + return retval; + } + else if (submatches) + { + EvaluableNodeReference retval(evaluableNodeManager->AllocNode(ENT_LIST), true); + + std::regex rx; + try { + rx.assign(regex_str, std::regex::ECMAScript); + } + catch(...) + { + return retval; + } + + std::sregex_iterator iter(begin(string_to_substr), end(string_to_substr), rx); + std::sregex_iterator rx_end; + + //find all the matches + size_t num_split = 0; + for(; iter != rx_end && num_split < max_match_count; ++iter, num_split++) + { + EvaluableNode *cur_match_elements = evaluableNodeManager->AllocNode(ENT_LIST); + retval->AppendOrderedChildNode(cur_match_elements); + + for(std::string s : *iter) + cur_match_elements->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, s)); + } + + return retval; + } + else //not a valid match state + { + return EvaluableNodeReference::Null(); + } + } + } + else //not a valid substr + { + return EvaluableNodeReference::Null(); + } +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CONCAT(EvaluableNode *en) +{ + //build string from all child nodes + auto &ocn = en->GetOrderedChildNodes(); + + //if only one parameter is specified, do a fast shortcut + if(ocn.size() == 1) + return EvaluableNodeReference(InterpretNodeIntoUniqueStringIDValueEvaluableNode(ocn[0]), true); + + std::string s; + for(auto &cn : ocn) + { + auto [valid, cur_string] = InterpretNodeIntoStringValue(cn); + if(!valid) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, string_intern_pool.NOT_A_STRING_ID), true); + + //want to exit early if out of resources because + // this opcode can chew through memory with string concatenation via returned nulls + if(AreExecutionResourcesExhausted()) + return EvaluableNodeReference::Null(); + + //since UTF-8, don't need to do any conversions to concatenate + s += cur_string; + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, s), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CRYPTO_SIGN(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + std::string message = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + std::string secret_key = InterpretNodeIntoStringValueEmptyNull(ocn[1]); + + std::string signature = SignMessage(message, secret_key); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, signature), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CRYPTO_SIGN_VERIFY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() < 3) + return EvaluableNodeReference::Null(); + + std::string message = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + std::string public_key = InterpretNodeIntoStringValueEmptyNull(ocn[1]); + std::string signature = InterpretNodeIntoStringValueEmptyNull(ocn[2]); + + bool valid_sig = IsSignatureValid(message, public_key, signature); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(valid_sig ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ENCRYPT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + std::string plaintext = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + std::string key_1 = InterpretNodeIntoStringValueEmptyNull(ocn[1]); + + std::string nonce = ""; + if(ocn.size() >= 3) + nonce = InterpretNodeIntoStringValueEmptyNull(ocn[2]); + + std::string key_2 = ""; + if(ocn.size() >= 4) + key_2 = InterpretNodeIntoStringValueEmptyNull(ocn[3]); + + std::string cyphertext = ""; + + //if no second key, then use symmetric key encryption + if(key_2 == "") + cyphertext = EncryptMessage(plaintext, key_1, nonce); + else //use public key encryption + cyphertext = EncryptMessage(plaintext, key_1, key_2, nonce); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, cyphertext), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_DECRYPT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + std::string cyphertext = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + std::string key_1 = InterpretNodeIntoStringValueEmptyNull(ocn[1]); + + std::string nonce = ""; + if(ocn.size() >= 3) + nonce = InterpretNodeIntoStringValueEmptyNull(ocn[2]); + + std::string key_2 = ""; + if(ocn.size() >= 4) + key_2 = InterpretNodeIntoStringValueEmptyNull(ocn[3]); + + std::string plaintext = ""; + + //if no second key, then use symmetric key encryption + if(key_2 == "") + plaintext = DecryptMessage(cyphertext, key_1, nonce); + else //use public key encryption + plaintext = DecryptMessage(cyphertext, key_1, key_2, nonce); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, plaintext), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_PRINT(EvaluableNode *en) +{ + for(auto &cn : en->GetOrderedChildNodes()) + { + auto cur = InterpretNodeForImmediateUse(cn); + + std::string s; + if(cur == nullptr) + { + s = "(null)"; + } + else if(IsEvaluableNodeTypeImmediate(cur->GetType())) + { + if(DoesEvaluableNodeTypeUseStringData(cur->GetType())) + s = cur->GetStringValue(); + else if(DoesEvaluableNodeTypeUseNumberData(cur->GetType())) + s = EvaluableNode::NumberToString(cur->GetNumberValue()); + else + s = EvaluableNode::ToString(cur); + } + else + { + s = Parser::Unparse(cur, evaluableNodeManager, true, true, true); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + if(writeListeners != nullptr) + { + for(auto &wl : *writeListeners) + wl->LogPrint(s); + } + if(printListener != nullptr) + printListener->LogPrint(s); + } + + if(writeListeners != nullptr) + { + for(auto &wl : *writeListeners) + wl->FlushLogFile(); + } + if(printListener != nullptr) + printListener->FlushLogFile(); + + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TOTAL_SIZE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto cur = InterpretNodeForImmediateUse(ocn[0]); + size_t total_size = EvaluableNode::GetDeepSize(cur); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(static_cast(total_size)), true); +} \ No newline at end of file diff --git a/src/Amalgam/interpreter/InterpreterOpcodesEntityAccess.cpp b/src/Amalgam/interpreter/InterpreterOpcodesEntityAccess.cpp new file mode 100644 index 00000000..1fb768a4 --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterOpcodesEntityAccess.cpp @@ -0,0 +1,643 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryBuilder.h" +#include "EntityQueryManager.h" +#include "EntityWriteListener.h" +#include "EvaluableNodeTreeDifference.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "PerformanceProfiler.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CONTAINS_ENTITY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to create within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //get the id of the source entity + auto source_id_node = InterpretNodeForImmediateUse(ocn[0]); + Entity *source_entity = TraverseToExistingEntityViaEvaluableNodeIDPath(curEntity, source_id_node); + evaluableNodeManager->FreeNodeTreeIfPossible(source_id_node); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(source_entity != nullptr ? 1.0 : 0.0), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CONTAINED_ENTITIES_and_COMPUTE_ON_CONTAINED_ENTITIES(EvaluableNode *en) +{ + //not allowed if don't have a Entity to work within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + bool return_query_value = (en->GetType() == ENT_COMPUTE_ON_CONTAINED_ENTITIES); + + Entity *source_entity = curEntity; + + //parameters to search entities for + EvaluableNode *query_params = nullptr; + + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() > 0) + { + EvaluableNodeReference first_param = InterpretNodeForImmediateUse(ocn[0]); + + if(first_param != nullptr) + { + if(first_param->GetType() == ENT_LIST && first_param->GetOrderedChildNodes().size() > 0 + && EvaluableNode::IsQuery(first_param->GetOrderedChildNodes()[0])) + { + query_params = first_param; + } + else //first parameter is the id + { + source_entity = TraverseToExistingEntityViaEvaluableNodeIDPath(curEntity, first_param); + evaluableNodeManager->FreeNodeTreeIfPossible(first_param); + + if(source_entity == nullptr) + return EvaluableNodeReference::Null(); + + if(ocn.size() > 1) + query_params = InterpretNodeForImmediateUse(ocn[1]); + } + } + else if(ocn.size() > 1) //got a nullptr, which means keep source_entity as curEntity + { + query_params = InterpretNodeForImmediateUse(ocn[1]); + } + } + + if(source_entity == nullptr) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_LIST), true); + + //if no query, just return all contained entities + if(query_params == nullptr || query_params->GetOrderedChildNodes().size() == 0) + { + auto &contained_entities = source_entity->GetContainedEntities(); + + //new list containing the contained entity ids to return + EvaluableNodeReference result( + evaluableNodeManager->AllocListNodeWithOrderedChildNodes(ENT_STRING, contained_entities.size()), true); + + auto &result_ocn = result->GetOrderedChildNodes(); + + //create the string references all at once and hand off + string_intern_pool.CreateStringReferences(contained_entities, [](Entity *e) { return e->GetIdStringId(); }); + for(size_t i = 0; i < contained_entities.size(); i++) + result_ocn[i]->SetStringIDWithReferenceHandoff(contained_entities[i]->GetIdStringId()); + + //if not using SBFDS, make sure always return in the same order for consistency, regardless of cashing, hashing, etc. + //if using SBFDS, then the order is assumed to not matter for other queries, so don't pay the cost of sorting here + if(!_enable_SBF_datastore) + std::sort(begin(result->GetOrderedChildNodes()), end(result->GetOrderedChildNodes()), EvaluableNode::IsStrictlyLessThan); + + return result; + } + + //parse ordered child nodes into conditions + conditionsBuffer.clear(); + for(auto &cn : query_params->GetOrderedChildNodes()) + { + if(cn == nullptr) + continue; + + EvaluableNodeType type = cn->GetType(); + switch(type) + { + case ENT_QUERY_WITHIN_GENERALIZED_DISTANCE: + case ENT_QUERY_NEAREST_GENERALIZED_DISTANCE: + case ENT_COMPUTE_ENTITY_CONVICTIONS: + case ENT_COMPUTE_ENTITY_GROUP_KL_DIVERGENCE: + case ENT_COMPUTE_ENTITY_DISTANCE_CONTRIBUTIONS: + case ENT_COMPUTE_ENTITY_KL_DIVERGENCES: + EntityQueryBuilder::BuildDistanceCondition(cn, type, conditionsBuffer); + break; + + default: + EntityQueryBuilder::BuildNonDistanceCondition(cn, type, conditionsBuffer, *evaluableNodeManager, randomStream); + break; + } + } + + //if not a valid query, return nullptr + if(conditionsBuffer.size() == 0) + return EvaluableNodeReference::Null(); + + //perform query + return EntityQueryManager::GetEntitiesMatchingQuery(source_entity, conditionsBuffer, evaluableNodeManager, return_query_value); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_QUERY_and_COMPUTE_opcodes(EvaluableNode *en) +{ + //use stack to lock it in place, but copy it back to temporary before returning + EvaluableNodeReference query_command(evaluableNodeManager->AllocNode(en->GetType()), true); + + auto node_stack = CreateInterpreterNodeStackStateSaver(query_command); + + //propagate concurrency + if(en->GetConcurrency()) + query_command->SetConcurrency(true); + + auto &ocn = en->GetOrderedChildNodes(); + query_command->ReserveOrderedChildNodes(ocn.size()); + for(auto &i : ocn) + { + auto value = InterpretNode(i); + //add it to the list + query_command->AppendOrderedChildNode(value); + + query_command.UpdatePropertiesBasedOnAttachedNode(value); + } + + return query_command; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CONTAINS_LABEL(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to work within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //get label to look up + size_t label_param_index = (ocn.size() > 1 ? 1 : 0); + //don't need an extra reference because will be false anyway if the string doesn't exist + StringInternPool::StringID label_sid = InterpretNodeIntoStringIDValueIfExists(ocn[label_param_index]); + if(label_sid == StringInternPool::NOT_A_STRING_ID) + return EvaluableNodeReference::Null(); + + //get the id of the entity + Entity *target_entity = curEntity; + if(ocn.size() > 1) + target_entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + + //if no entity, clean up assignment assoc + if(target_entity == nullptr) + return EvaluableNodeReference::Null(); + + //make sure not trying to access a private label + if(target_entity != curEntity && Entity::IsLabelPrivate(label_sid)) + return EvaluableNodeReference::Null(); + + bool contains_label = target_entity->DoesLabelExist(label_sid); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(contains_label ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ASSIGN_TO_ENTITIES_and_DIRECT_ASSIGN_TO_ENTITIES_and_ACCUM_TO_ENTITIES(EvaluableNode *en) +{ + //not allowed if don't have a Entity to work within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + auto &ocn = en->GetOrderedChildNodes(); + + bool all_assignments_successful = true; + for(size_t i = 0; i < ocn.size(); i += 2) + { + //get the id of the entity before the variables to assign + // so don't need to create a node stack + Entity *target_entity = curEntity; + if(i + 1 < ocn.size()) + { + auto source_id_node = InterpretNodeForImmediateUse(ocn[i]); + target_entity = TraverseToExistingEntityViaEvaluableNodeIDPath(curEntity, source_id_node); + evaluableNodeManager->FreeNodeTreeIfPossible(source_id_node); + } + + //if no entity, can't successfully assign + if(target_entity == nullptr) + { + all_assignments_successful = false; + continue; + } + + //get variables to assign + size_t assoc_param_index = (i + 1 < ocn.size() ? i + 1 : i); + auto assigned_vars = InterpretNode(ocn[assoc_param_index]); + + if(assigned_vars == nullptr) + { + all_assignments_successful = false; + continue; + } + + bool direct = (en->GetType() == ENT_DIRECT_ASSIGN_TO_ENTITIES); + bool accum_assignment = (en->GetType() == ENT_ACCUM_TO_ENTITIES); + + size_t num_new_nodes_allocated = 0; + bool any_successful_assignment = false; + + { //use a block so lock can go out of scope as appropriate + #ifdef MULTITHREAD_SUPPORT + auto write_lock = target_entity->CreateEntityLock(); + + bool copy_entity = IsEntitySafeForModification(target_entity); + #else + bool copy_entity = false; + #endif + + auto [any_success, all_success] = target_entity->SetValuesAtLabels( + assigned_vars, accum_assignment, direct, writeListeners, + (AllowUnlimitedExecutionNodes() ? nullptr : &num_new_nodes_allocated), target_entity == curEntity, copy_entity); + + any_successful_assignment = any_success; + } + + if(any_successful_assignment) + { + if(!AllowUnlimitedExecutionNodes()) + curNumExecutionNodesAllocatedToEntities += num_new_nodes_allocated; + + target_entity->evaluableNodeManager.AdvanceGarbageCollectionTrigger(); + + //collect garbage, but not on current entity, save that for between instructions + if(target_entity != curEntity) + { + //for deep debugging only + //ValidateEvaluableNodeIntegrity(); + + #ifdef MULTITHREAD_SUPPORT + target_entity->CollectGarbage(&memoryModificationLock); + #else + target_entity->CollectGarbage(); + #endif + + //for deep debugging only + //ValidateEvaluableNodeIntegrity(); + } + } + else + { + all_assignments_successful = false; + } + + //check this at the end of each iteration in case need to exit + if(AreExecutionResourcesExhausted()) + return EvaluableNodeReference::Null(); + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(all_assignments_successful ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_RETRIEVE_FROM_ENTITY_and_DIRECT_RETRIEVE_FROM_ENTITY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to work within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //get the id of the source to check + Entity *target_entity = curEntity; + if(ocn.size() > 1) + target_entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + + if(target_entity == nullptr) + return EvaluableNodeReference::Null(); + + //get lookup reference + size_t lookup_param_index = (ocn.size() > 1 ? 1 : 0); + auto to_lookup = InterpretNode(ocn[lookup_param_index]); + + bool direct = (en->GetType() == ENT_DIRECT_RETRIEVE_FROM_ENTITY); + + //get the value(s) + if(to_lookup == nullptr || IsEvaluableNodeTypeImmediate(to_lookup->GetType())) + { + StringInternPool::StringID label_sid = EvaluableNode::ToStringIDIfExists(to_lookup); + evaluableNodeManager->FreeNodeTreeIfPossible(to_lookup); + + ExecutionCycleCount num_steps_executed = 0; + EvaluableNodeReference value = target_entity->GetValueAtLabel(label_sid, evaluableNodeManager, direct, target_entity == curEntity); + curExecutionStep += num_steps_executed; + + return value; + } + else if(to_lookup->IsAssociativeArray()) + { + //reference to keep track of to_lookup nodes to free + EvaluableNodeReference cnr(nullptr, to_lookup.unique); + + //need to return an assoc, so see if need to make copy; will overwrite all values + if(!to_lookup.unique) + to_lookup = EvaluableNodeReference(evaluableNodeManager->AllocNode(to_lookup), true); + + auto node_stack = CreateInterpreterNodeStackStateSaver(to_lookup); + + //overwrite values in the ordered + for(auto &[cn_id, cn] : to_lookup->GetMappedChildNodesReference()) + { + //if there are values passed in, free them to be clobbered + cnr.reference = cn; + evaluableNodeManager->FreeNodeTreeIfPossible(cnr); + + ExecutionCycleCount num_steps_executed = 0; + EvaluableNodeReference value = target_entity->GetValueAtLabel(cn_id, evaluableNodeManager, direct, target_entity == curEntity); + curExecutionStep += num_steps_executed; + + cn = value; + to_lookup.UpdatePropertiesBasedOnAttachedNode(value); + } + + return to_lookup; + } + else //ordered params + { + //reference to keep track of to_lookup nodes to free + EvaluableNodeReference cnr(nullptr, to_lookup.unique); + + //need to return an assoc, so see if need to make copy; will overwrite all values + if(!to_lookup.unique) + to_lookup = EvaluableNodeReference(evaluableNodeManager->AllocNode(to_lookup), true); + + auto node_stack = CreateInterpreterNodeStackStateSaver(to_lookup); + + //overwrite values in the ordered + for(auto &cn : to_lookup->GetOrderedChildNodes()) + { + StringInternPool::StringID label_sid = EvaluableNode::ToStringIDIfExists(cn); + + //if there are values passed in, free them to be clobbered + cnr.reference = cn; + evaluableNodeManager->FreeNodeTreeIfPossible(cnr); + + ExecutionCycleCount num_steps_executed = 0; + EvaluableNodeReference value = target_entity->GetValueAtLabel(label_sid, evaluableNodeManager, direct, target_entity == curEntity); + curExecutionStep += num_steps_executed; + + cn = value; + to_lookup.UpdatePropertiesBasedOnAttachedNode(value); + } + + return to_lookup; + } +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CALL_ENTITY_and_CALL_ENTITY_GET_CHANGES(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to check within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + Entity *called_entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + if(called_entity == nullptr) + return EvaluableNodeReference::Null(); + + StringInternPool::StringID entity_label_sid = StringInternPool::NOT_A_STRING_ID; + if(ocn.size() > 1) + entity_label_sid = InterpretNodeIntoStringIDValueWithReference(ocn[1]); + +#ifdef INTERPRETER_PROFILE_LABELS_CALLED + performance_profiler.StartOperation(string_intern_pool.GetStringFromID(entity_label_sid), evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + //number of execution steps + //evaluate before context so don't need to keep/remove reference for context + ExecutionCycleCount num_steps_allowed = GetRemainingNumExecutionSteps(); + bool num_steps_allowed_specified = false; + if(ocn.size() > 3) + { + num_steps_allowed = static_cast(InterpretNodeIntoNumberValue(ocn[3])); + num_steps_allowed_specified = true; + } + + //number of execution nodes + //evaluate before context so don't need to keep/remove reference for context + size_t num_nodes_allowed = GetRemainingNumExecutionNodes(); + bool num_nodes_allowed_specified = false; + if(ocn.size() > 4) + { + num_nodes_allowed = static_cast(InterpretNodeIntoNumberValue(ocn[4])); + num_nodes_allowed_specified = true; + } + + //attempt to get arguments + EvaluableNodeReference args = EvaluableNodeReference::Null(); + if(ocn.size() > 2) + { + args = InterpretNodeForImmediateUse(ocn[2]); + //since it is going to be called by a different entity, ConvertArgsToCallStack will + // need to make a copy, and the contained entity should not treat args as unique + args.unique = false; + } + + EvaluableNodeManager *called_entity_enm = &called_entity->evaluableNodeManager; + + //if have arguments, use them + EvaluableNodeReference call_stack = ConvertArgsToCallStack(args, called_entity_enm); + auto node_stack = CreateInterpreterNodeStackStateSaver(call_stack); + + //current pointer to write listeners + std::vector *cur_write_listeners = writeListeners; + //another storage container in case getting entity changes + std::vector get_changes_write_listeners; + if(en->GetType() == ENT_CALL_ENTITY_GET_CHANGES) + { + //add on extra listener and set pointer to this buffer + // keep the copying here in this if statement so don't need to make copies when not calling ENT_CALL_ENTITY_GET_CHANGES + if(writeListeners != nullptr) + get_changes_write_listeners = *writeListeners; + get_changes_write_listeners.push_back(new EntityWriteListener(curEntity, true)); + cur_write_listeners = &get_changes_write_listeners; + } + + //compute execution limits + if(AllowUnlimitedExecutionSteps() && (!num_steps_allowed_specified || num_steps_allowed == 0)) + num_steps_allowed = 0; + else + { + //if unlimited steps are allowed, then leave the value as specified, otherwise clamp to what is remaining + if(!AllowUnlimitedExecutionSteps()) + num_steps_allowed = std::min(num_steps_allowed, GetRemainingNumExecutionSteps()); + } + + if(AllowUnlimitedExecutionNodes() && (!num_nodes_allowed_specified || num_nodes_allowed == 0)) + num_nodes_allowed = 0; + else + { + //if unlimited nodes are allowed, then leave the value as specified, otherwise clamp to what is remaining + if(!AllowUnlimitedExecutionNodes()) + num_nodes_allowed = std::min(num_nodes_allowed, GetRemainingNumExecutionNodes()); + } + + ExecutionCycleCount num_steps_executed = 0; + size_t num_nodes_allocated = 0; + EvaluableNodeReference retval = called_entity->Execute(num_steps_allowed, num_steps_executed, + num_nodes_allowed, num_nodes_allocated, + cur_write_listeners, printListener, call_stack, called_entity == curEntity, evaluableNodeManager, + #ifdef MULTITHREAD_SUPPORT + &memoryModificationLock, + #endif + entity_label_sid, this); + + //accumulate costs of execution + curExecutionStep += num_steps_executed; + curNumExecutionNodesAllocatedToEntities += num_nodes_allocated; + + string_intern_pool.DestroyStringReference(entity_label_sid); + + if(en->GetType() == ENT_CALL_ENTITY_GET_CHANGES) + { + EntityWriteListener *wl = get_changes_write_listeners.back(); + EvaluableNode *writes = wl->GetWrites(); + + EvaluableNode *list = evaluableNodeManager->AllocNode(ENT_LIST); + //copy the data out of the write listener + list->AppendOrderedChildNode(evaluableNodeManager->DeepAllocCopy(retval)); + list->AppendOrderedChildNode(evaluableNodeManager->DeepAllocCopy(writes)); + + //delete the write listener and all of its memory + delete wl; + + retval.reference = list; + retval.SetNeedCycleCheck(true); //can't count on that due to things written in the write listener + } + + //ConvertArgsToCallStack always adds an outer list that is safe to free using called_entity_enm + called_entity_enm->FreeNode(call_stack); + +#ifdef INTERPRETER_PROFILE_LABELS_CALLED + performance_profiler.EndOperation(evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + return retval; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CALL_CONTAINER(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a containing Entity to call + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + Entity *container = curEntity->GetContainer(); + if(container == nullptr) + return EvaluableNodeReference::Null(); + + std::string orig_container_label_name = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + if(orig_container_label_name == "") + return EvaluableNodeReference::Null(); + + std::string container_label_name; + container_label_name.reserve(orig_container_label_name.size() + 2); + container_label_name = '^' + orig_container_label_name; + +#ifdef INTERPRETER_PROFILE_LABELS_CALLED + performance_profiler.StartOperation(container_label_name, evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + //number of execution steps + //evaluate before context so don't need to keep/remove reference for context + ExecutionCycleCount num_steps_allowed = GetRemainingNumExecutionSteps(); + bool num_steps_allowed_specified = false; + if(ocn.size() > 2) + { + num_steps_allowed = static_cast(InterpretNodeIntoNumberValue(ocn[2])); + num_steps_allowed_specified = true; + } + + //number of execution nodes + //evaluate before context so don't need to keep/remove reference for context + size_t num_nodes_allowed = GetRemainingNumExecutionNodes(); + bool num_nodes_allowed_specified = false; + if(ocn.size() > 3) + { + num_nodes_allowed = static_cast(InterpretNodeIntoNumberValue(ocn[3])); + num_nodes_allowed_specified = true; + } + + //use the container's EvaluableNodeManager to make sure that an outer entity + // does not free a node that an inner entity is using, which can occur when the inner + // entity is calling its container and the container frees the node + EvaluableNodeManager *container_enm = &container->evaluableNodeManager; + + //attempt to get arguments + EvaluableNodeReference args = EvaluableNodeReference::Null(); + if(ocn.size() > 1) + { + args = InterpretNodeForImmediateUse(ocn[1]); + args = container_enm->DeepAllocCopy(args); + } + + //need to create arguments regardless + EvaluableNodeReference call_stack = ConvertArgsToCallStack(args, container_enm); + + auto node_stack = CreateInterpreterNodeStackStateSaver(call_stack); + + //add accessing_entity to arguments. If accessing_entity already specified (it shouldn't be), let garbage collection clean it up + args->SetMappedChildNode(ENBISI_accessing_entity, container_enm->AllocNode(ENT_STRING, curEntity->GetIdStringId())); + + //compute execution limits + if(AllowUnlimitedExecutionSteps() && (!num_steps_allowed_specified || num_steps_allowed == 0)) + num_steps_allowed = 0; + else + { + //if unlimited steps are allowed, then leave the value as specified, otherwise clamp to what is remaining + if(!AllowUnlimitedExecutionSteps()) + num_steps_allowed = std::min(num_steps_allowed, GetRemainingNumExecutionSteps()); + } + + if(AllowUnlimitedExecutionNodes() && (!num_nodes_allowed_specified || num_nodes_allowed == 0)) + num_nodes_allowed = 0; + else + { + //if unlimited nodes are allowed, then leave the value as specified, otherwise clamp to what is remaining + if(!AllowUnlimitedExecutionNodes()) + num_nodes_allowed = std::min(num_nodes_allowed, GetRemainingNumExecutionNodes()); + } + + ExecutionCycleCount num_steps_executed = 0; + size_t num_nodes_allocated = 0; + EvaluableNodeReference retval = container->Execute(num_steps_allowed, num_steps_executed, num_nodes_allowed, num_nodes_allocated, + writeListeners, printListener, call_stack, false, evaluableNodeManager, + #ifdef MULTITHREAD_SUPPORT + &memoryModificationLock, + #endif + container_label_name, this); + + //accumulate costs of execution + curExecutionStep += num_steps_executed; + curNumExecutionNodesAllocatedToEntities += num_nodes_allocated; + + //ConvertArgsToCallStack always adds an outer list that is safe to free + evaluableNodeManager->FreeNode(call_stack); + +#ifdef INTERPRETER_PROFILE_LABELS_CALLED + performance_profiler.EndOperation(evaluableNodeManager->GetNumberOfUsedNodes()); +#endif + + return retval; +} diff --git a/src/Amalgam/interpreter/InterpreterOpcodesEntityControl.cpp b/src/Amalgam/interpreter/InterpreterOpcodesEntityControl.cpp new file mode 100644 index 00000000..1d4c05df --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterOpcodesEntityControl.cpp @@ -0,0 +1,774 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "EvaluableNodeTreeDifference.h" +#include "PerformanceProfiler.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_ENTITY_COMMENTS(EvaluableNode *en) +{ + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + Entity *target_entity = curEntity; + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() > 0) + target_entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + + if(target_entity == nullptr) + return EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + //TODO 10975: move this into the entity access above + auto read_lock = target_entity->CreateEntityLock(); +#endif + + StringInternPool::StringID label_sid = StringInternPool::NOT_A_STRING_ID; + if(ocn.size() > 1) + label_sid = InterpretNodeIntoStringIDValueIfExists(ocn[1]); + + bool deep_comments = false; + if(ocn.size() > 2) + deep_comments = InterpretNodeIntoBoolValue(ocn[2]); + + if(label_sid == StringInternPool::NOT_A_STRING_ID) + { + if(!deep_comments) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, EvaluableNode::GetCommentsStringId(target_entity->GetRoot())), true); + + EvaluableNodeReference retval(evaluableNodeManager->AllocNode(ENT_ASSOC), true); + + //collect comments of each label + target_entity->IterateFunctionOverLabels( + [this, &retval] + (StringInternPool::StringID label_sid, EvaluableNode *node) + { + //don't include those that are only inwardly facing + if(!Entity::IsLabelAccessibleToContainedEntities(label_sid)) + retval->SetMappedChildNode(label_sid, evaluableNodeManager->AllocNode(ENT_STRING, EvaluableNode::GetCommentsStringId(node))); + } + ); + + return retval; + } + + auto label_value = target_entity->GetValueAtLabel(label_sid, nullptr, true); + + //has valid label + if(!deep_comments) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, label_value->GetCommentsStringId()), true); + + //make sure a function based on declare that has parameters + if(label_value == nullptr || label_value->GetType() != ENT_DECLARE || label_value->GetOrderedChildNodes().size() < 1) + return EvaluableNodeReference::Null(); + + //deep_comments of label, so get the parameters and their respective labels + EvaluableNodeReference retval(evaluableNodeManager->AllocNode(ENT_ASSOC), true); + + EvaluableNode *vars = label_value->GetOrderedChildNodes()[0]; + + //if the vars are already initialized, then pull the comments from their values + if(EvaluableNode::IsAssociativeArray(vars)) + { + auto &mcn = vars->GetMappedChildNodesReference(); + retval->ReserveMappedChildNodes(mcn.size()); + + //create the string references all at once and hand off + string_intern_pool.CreateStringReferences(mcn, [](auto it) { return it.first; }); + for(auto &[cn_id, cn] : mcn) + retval->SetMappedChildNodeWithReferenceHandoff(cn_id, evaluableNodeManager->AllocNode(ENT_STRING, EvaluableNode::GetCommentsStringId(cn))); + + return retval; + } + + //the vars are not initialized, which means the comments are on the parameters + retval->ReserveMappedChildNodes(vars->GetOrderedChildNodes().size() / 2); + for(size_t index = 0; index < vars->GetOrderedChildNodes().size(); index += 2) + { + EvaluableNode *variable_name_node = vars->GetOrderedChildNodes()[index]; + StringInternPool::StringID sid = EvaluableNode::ToStringIDIfExists(variable_name_node); + if(sid == StringInternPool::NOT_A_STRING_ID) + continue; + + retval->SetMappedChildNode(sid, evaluableNodeManager->AllocNode(ENT_STRING, EvaluableNode::GetCommentsStringId(variable_name_node))); + } + + return retval; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_RETRIEVE_ENTITY_ROOT(EvaluableNode *en) +{ + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //get entity by id parameter if exists + Entity *target_entity = curEntity; + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() > 0) + { + auto id_path_node = InterpretNodeForImmediateUse(ocn[0]); + //if there's a path, then overwrite the entity with something new, otherwise leave as curEntity + if(id_path_node != nullptr) + { + target_entity = TraverseToExistingEntityViaEvaluableNodeIDPath(curEntity, id_path_node); + evaluableNodeManager->FreeNodeTreeIfPossible(id_path_node); + } + } + + if(target_entity == nullptr) + return EvaluableNodeReference::Null(); + + //get second parameter if exists + auto label_escape_increment = EvaluableNodeManager::ENMM_LABEL_ESCAPE_INCREMENT; + if(ocn.size() > 1) + { + auto value = InterpretNodeIntoNumberValue(ocn[1]); + if(value) + label_escape_increment = EvaluableNodeManager::ENMM_NO_CHANGE; + } + + return target_entity->GetRoot(evaluableNodeManager, label_escape_increment); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ASSIGN_ENTITY_ROOTS_and_ACCUM_ENTITY_ROOTS(EvaluableNode *en) +{ + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + auto &ocn = en->GetOrderedChildNodes(); + + bool accum = (en->GetType() == ENT_ACCUM_ENTITY_ROOTS); + bool all_assignments_successful = true; + + for(size_t i = 0; i < ocn.size(); i += 2) + { + Entity *target_entity = curEntity; + if(i + 1 < ocn.size()) + { + target_entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[i]); + + //if didn't find an entity, then use current one + if(target_entity == nullptr) + { + all_assignments_successful = false; + continue; + } + } + + EvaluableNodeReference new_code = EvaluableNodeReference::Null(); + if(i + 1 < ocn.size()) + new_code = InterpretNode(ocn[i + 1]); + else + new_code = InterpretNode(ocn[i]); + + if(accum) + { + target_entity->AccumRoot(new_code, false, EvaluableNodeManager::ENMM_LABEL_ESCAPE_DECREMENT, writeListeners); + + //accumulate new node usage + if(!AllowUnlimitedExecutionNodes()) + curNumExecutionNodesAllocatedToEntities += EvaluableNode::GetDeepSize(new_code); + } + else + { + size_t prev_size = 0; + if(!AllowUnlimitedExecutionNodes()) + prev_size = target_entity->GetSizeInNodes(); + + target_entity->SetRoot(new_code, false, EvaluableNodeManager::ENMM_LABEL_ESCAPE_DECREMENT, writeListeners); + + if(!AllowUnlimitedExecutionNodes()) + { + size_t cur_size = target_entity->GetSizeInNodes(); + //don't get credit for freeing memory, but do count toward memory consumed + if(cur_size > prev_size) + curNumExecutionNodesAllocatedToEntities += cur_size - prev_size; + } + } + + target_entity->evaluableNodeManager.AdvanceGarbageCollectionTrigger(); + + #ifdef MULTITHREAD_SUPPORT + target_entity->CollectGarbage(&memoryModificationLock); + #else + target_entity->CollectGarbage(); + #endif + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(all_assignments_successful ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_ENTITY_RAND_SEED(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to retrieve others from + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //get the id of the entity + Entity *entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + if(entity == nullptr) + return EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + //TODO 10975: move this into the entity access above + auto read_lock = entity->CreateEntityLock(); +#endif + + std::string rand_state_string = entity->GetRandomState(); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, rand_state_string), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_ENTITY_RAND_SEED(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + size_t num_params = ocn.size(); + + if(num_params < 1) + return EvaluableNodeReference::Null(); + + //not allowed if don't have a Entity to retrieve others from + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + //the opcode parameter index of the seed + size_t seed_param_index = 0; + + //get the entity + Entity *entity = curEntity; + if(num_params > 1) + { + entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + seed_param_index++; + } + + if(entity == nullptr) + return EvaluableNodeReference::Null(); + + //retrieve parameter to determine whether to deep set the seeds, if applicable + bool deep_set = true; + if(num_params == 3) + deep_set = InterpretNodeIntoBoolValue(ocn[2], true); + + auto seed_node = InterpretNode(ocn[seed_param_index]); + std::string seed_string; + if(seed_node != nullptr && seed_node->GetType() == ENT_STRING) + seed_string = seed_node->GetStringValue(); + else + seed_string = Parser::Unparse(seed_node, evaluableNodeManager, false, false, true); + +#ifdef MULTITHREAD_SUPPORT + //TODO 10975: move this into the entity access above and, if deep_set is true, lock all contained entities + auto write_lock = entity->CreateEntityLock(); +#endif + + entity->SetRandomState(seed_string, deep_set, writeListeners); + + return seed_node; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_ENTITY_ROOT_PERMISSION(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + if(!asset_manager.DoesEntityHaveRootPermission(curEntity)) + return EvaluableNodeReference::Null(); + + //get the id of the entity + Entity *entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[0]); + if(entity == nullptr) + return EvaluableNodeReference::Null(); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(asset_manager.DoesEntityHaveRootPermission(entity) ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_ENTITY_ROOT_PERMISSION(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + if(!asset_manager.DoesEntityHaveRootPermission(curEntity)) + return EvaluableNodeReference::Null(); + + bool permission = InterpretNodeIntoBoolValue(ocn[1]); + + //get the id of the entity + auto id_node = InterpretNode(ocn[0]); + Entity *entity = TraverseToExistingEntityViaEvaluableNodeIDPath(curEntity, id_node); + + asset_manager.SetRootPermission(entity, permission); + + return id_node; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CREATE_ENTITIES(EvaluableNode *en) +{ + //not allowed if don't have a Entity to create within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + auto &ocn = en->GetOrderedChildNodes(); + + EvaluableNodeReference new_entity_ids_list(evaluableNodeManager->AllocNode(ENT_LIST), true); + new_entity_ids_list->ReserveOrderedChildNodes((ocn.size() + 1) / 2); + auto node_stack = CreateInterpreterNodeStackStateSaver(new_entity_ids_list); + + for(size_t i = 0; i < ocn.size(); i += 2) + { + //code will be the last parameter + EvaluableNodeReference root = EvaluableNodeReference::Null(); + if(i + 1 == ocn.size()) + root = InterpretNodeForImmediateUse(ocn[i]); + else + root = InterpretNodeForImmediateUse(ocn[i + 1]); + + //get destination if applicable + StringInternRef new_entity_id; + Entity *destination_entity_parent = curEntity; + if(i + 1 < ocn.size()) + { + node_stack.PushEvaluableNode(root); + InterpretNodeIntoDestinationEntity(ocn[i], destination_entity_parent, new_entity_id); + node_stack.PopEvaluableNode(); + } + + if(destination_entity_parent == nullptr) + { + new_entity_ids_list->AppendOrderedChildNode(nullptr); + continue; + } + + //create new entity + Entity *new_entity = new Entity(); + new_entity->SetRoot(root, false, EvaluableNodeManager::ENMM_LABEL_ESCAPE_DECREMENT); + + //accumulate usage + if(!AllowUnlimitedExecutionNodes()) + curNumExecutionNodesAllocatedToEntities += new_entity->GetDeepSizeInNodes(); + + const std::string &new_entity_id_string = string_intern_pool.GetStringFromID(new_entity_id); + new_entity->SetRandomState(destination_entity_parent->CreateOtherRandomStreamStateViaString(new_entity_id_string), false); + + destination_entity_parent->AddContainedEntityViaReference(new_entity, new_entity_id, writeListeners); + + if(new_entity_id == StringInternPool::NOT_A_STRING_ID) + { + delete new_entity; + new_entity_ids_list->AppendOrderedChildNode(nullptr); + continue; + } + + if(destination_entity_parent == curEntity) + new_entity_ids_list->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, new_entity_id)); + else //need an id list + new_entity_ids_list->AppendOrderedChildNode(GetTraversalIDPathListFromAToB(evaluableNodeManager, curEntity, new_entity)); + } + + return new_entity_ids_list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CLONE_ENTITIES(EvaluableNode *en) +{ + //not allowed if don't have a Entity to create within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + auto &ocn = en->GetOrderedChildNodes(); + + EvaluableNodeReference new_entity_ids_list(evaluableNodeManager->AllocNode(ENT_LIST), true); + new_entity_ids_list->ReserveOrderedChildNodes((ocn.size() + 1) / 2); + auto node_stack = CreateInterpreterNodeStackStateSaver(new_entity_ids_list); + + for(size_t i = 0; i < ocn.size(); i += 2) + { + //get the id of the source entity + Entity *source_entity = InterpretNodeIntoRelativeSourceEntityFromInterpretedEvaluableNodeIDPath(ocn[i]); + //need a source entity, and can't copy self! (that could cause badness) + if(source_entity == nullptr || source_entity == curEntity) + { + new_entity_ids_list->AppendOrderedChildNode(nullptr); + continue; + } + + //get destination if applicable + StringInternRef new_entity_id; + Entity *destination_entity_parent = curEntity; + if(i + 1 < ocn.size()) + InterpretNodeIntoDestinationEntity(ocn[i + 1], destination_entity_parent, new_entity_id); + if(destination_entity_parent == nullptr) + { + new_entity_ids_list->AppendOrderedChildNode(nullptr); + continue; + } + + //create new entity + Entity *new_entity = new Entity(source_entity); + + //accumulate usage + if(!AllowUnlimitedExecutionNodes()) + curNumExecutionNodesAllocatedToEntities += new_entity->GetDeepSizeInNodes(); + + destination_entity_parent->AddContainedEntityViaReference(new_entity, new_entity_id, writeListeners); + + if(new_entity_id == StringInternPool::NOT_A_STRING_ID) + { + delete new_entity; + new_entity_ids_list->AppendOrderedChildNode(nullptr); + continue; + } + + if(destination_entity_parent == curEntity) + new_entity_ids_list->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, new_entity_id)); + else //need an id list + new_entity_ids_list->AppendOrderedChildNode(GetTraversalIDPathListFromAToB(evaluableNodeManager, curEntity, new_entity)); + } + + return new_entity_ids_list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MOVE_ENTITIES(EvaluableNode *en) +{ + //not allowed if don't have a Entity to create within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + auto &ocn = en->GetOrderedChildNodes(); + + EvaluableNodeReference new_entity_ids_list(evaluableNodeManager->AllocNode(ENT_LIST), true); + new_entity_ids_list->ReserveOrderedChildNodes((ocn.size() + 1) / 2); + auto node_stack = CreateInterpreterNodeStackStateSaver(new_entity_ids_list); + + for(size_t i = 0; i < ocn.size(); i += 2) + { + //get the id of the source entity + auto source_id_node = InterpretNodeForImmediateUse(ocn[i]); + + StringInternRef source_entity_id; + Entity *source_entity_parent = nullptr, *source_entity = nullptr; + TraverseToEntityViaEvaluableNodeIDPath(curEntity, source_id_node, source_entity_parent, source_entity_id, source_entity); + evaluableNodeManager->FreeNodeTreeIfPossible(source_id_node); + + if(source_entity == nullptr || source_entity_parent == nullptr || source_entity == curEntity) + { + new_entity_ids_list->AppendOrderedChildNode(nullptr); + continue; + } + + //get destination if applicable + StringInternRef new_entity_id; + Entity *destination_entity_parent = curEntity; + if(i + 1 < ocn.size()) + InterpretNodeIntoDestinationEntity(ocn[i + 1], destination_entity_parent, new_entity_id); + if(destination_entity_parent == nullptr) + { + new_entity_ids_list->AppendOrderedChildNode(nullptr); + continue; + } + + //can't move if being executed + if(source_entity->IsEntityCurrentlyBeingExecuted()) + { + new_entity_ids_list->AppendOrderedChildNode(nullptr); + continue; + } + + //remove source entity from its parent + source_entity_parent->RemoveContainedEntity(source_entity_id, writeListeners); + + //put it in the destination + destination_entity_parent->AddContainedEntityViaReference(source_entity, new_entity_id, writeListeners); + + if(new_entity_id == StringInternPool::NOT_A_STRING_ID) + { + delete source_entity; + new_entity_ids_list->AppendOrderedChildNode(nullptr); + continue; + } + + if(destination_entity_parent == curEntity) + new_entity_ids_list->AppendOrderedChildNode(evaluableNodeManager->AllocNode(ENT_STRING, new_entity_id)); + else //need an id list + new_entity_ids_list->AppendOrderedChildNode(GetTraversalIDPathListFromAToB(evaluableNodeManager, curEntity, source_entity)); + } + + return new_entity_ids_list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_DESTROY_ENTITIES(EvaluableNode *en) +{ + //not allowed if don't have a Entity to create within + if(curEntity == nullptr) + return EvaluableNodeReference::Null(); + + bool all_destroys_successful = true; + for(auto &cn : en->GetOrderedChildNodes()) + { + //get the id of the source entity + auto source_id_node = InterpretNodeForImmediateUse(cn); + Entity *source_entity = nullptr, *source_entity_parent = nullptr; + StringInternRef source_id; + TraverseToEntityViaEvaluableNodeIDPath(curEntity, source_id_node, source_entity_parent, source_id, source_entity); + evaluableNodeManager->FreeNodeTreeIfPossible(source_id_node); + + //need a source entity, and can't destroy self! (that could cause badness) + if(source_entity == nullptr || source_entity == curEntity) + { + all_destroys_successful = false; + continue; + } + + if(source_entity->IsEntityCurrentlyBeingExecuted()) + { + all_destroys_successful = false; + continue; + } + + if(source_entity_parent != nullptr) + source_entity_parent->RemoveContainedEntity(source_id, writeListeners); + + delete source_entity; + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(all_destroys_successful ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_LOAD(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + if(!asset_manager.DoesEntityHaveRootPermission(curEntity)) + return EvaluableNodeReference::Null(); + + std::string resource_name = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + if(resource_name == "") + return EvaluableNodeReference::Null(); + + bool escape_filename = false; + if(ocn.size() >= 2) + escape_filename = InterpretNodeIntoBoolValue(ocn[1], false); + + std::string file_type = ""; + if(ocn.size() >= 3) + { + auto [valid, file_type_temp] = InterpretNodeIntoStringValue(ocn[2]); + if(valid) + file_type = file_type_temp; + } + + std::string resource_base_path; + return asset_manager.LoadResourcePath(resource_name, resource_base_path, file_type, evaluableNodeManager, escape_filename); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_LOAD_ENTITY_and_LOAD_PERSISTENT_ENTITY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + if(!asset_manager.DoesEntityHaveRootPermission(curEntity)) + return EvaluableNodeReference::Null(); + + std::string resource_name = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + if(resource_name == "") + return EvaluableNodeReference::Null(); + + //get destination if applicable + StringInternRef new_entity_id; + Entity *destination_entity_parent = curEntity; + if(ocn.size() >= 2) + InterpretNodeIntoDestinationEntity(ocn[1], destination_entity_parent, new_entity_id); + if(destination_entity_parent == nullptr) + return EvaluableNodeReference::Null(); + + bool escape_filename = false; + if(ocn.size() >= 3) + escape_filename = InterpretNodeIntoBoolValue(ocn[2], false); + + bool escape_contained_filenames = true; + if(ocn.size() >= 4) + escape_contained_filenames = InterpretNodeIntoBoolValue(ocn[3], true); + + bool persistent = (en->GetType() == ENT_LOAD_PERSISTENT_ENTITY); + if(persistent) + escape_contained_filenames = true; + + //persistent doesn't allow file_type + std::string file_type = ""; + if(!persistent && ocn.size() >= 5) + { + auto [valid, file_type_temp] = InterpretNodeIntoStringValue(ocn[4]); + if(valid) + file_type = file_type_temp; + } + + std::string random_seed = destination_entity_parent->CreateOtherRandomStreamStateViaString(resource_name); + Entity *loaded_entity = asset_manager.LoadEntityFromResourcePath(resource_name, file_type, + persistent, true, escape_filename, escape_contained_filenames, random_seed); + + //handle errors + if(loaded_entity == nullptr) + return EvaluableNodeReference::Null(); + if(new_entity_id == StringInternPool::NOT_A_STRING_ID) + { + delete loaded_entity; + return EvaluableNodeReference::Null(); + } + + //accumulate usage + if(!AllowUnlimitedExecutionNodes()) + curNumExecutionNodesAllocatedToEntities += loaded_entity->GetDeepSizeInNodes(); + + //put it in the destination + destination_entity_parent->AddContainedEntityViaReference(loaded_entity, new_entity_id, writeListeners); + + if(destination_entity_parent == curEntity) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, new_entity_id), true); + else //need to return an id list + return EvaluableNodeReference(GetTraversalIDPathListFromAToB(evaluableNodeManager, curEntity, loaded_entity), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_STORE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + if(!asset_manager.DoesEntityHaveRootPermission(curEntity)) + return EvaluableNodeReference::Null(); + + std::string resource_name = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + if(resource_name == "") + return EvaluableNodeReference::Null(); + + auto to_store = InterpretNodeForImmediateUse(ocn[1]); + auto node_stack = CreateInterpreterNodeStackStateSaver(to_store); + + bool escape_filename = false; + if(ocn.size() >= 3) + escape_filename = InterpretNodeIntoBoolValue(ocn[2], false); + + std::string file_type = ""; + if(ocn.size() >= 4) + { + auto [valid, file_type_temp] = InterpretNodeIntoStringValue(ocn[3]); + if(valid) + file_type = file_type_temp; + } + + bool sort_keys = false; + if(ocn.size() >= 5) + { + EvaluableNodeReference params = InterpretNodeForImmediateUse(ocn[4]); + + if(EvaluableNode::IsAssociativeArray(params)) + { + auto &mcn = params->GetMappedChildNodesReference(); + + auto found_sort_keys = mcn.find(ENBISI_sort_keys); + if(found_sort_keys != end(mcn)) + sort_keys = EvaluableNode::IsTrue(found_sort_keys->second); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(params); + } + + std::string resource_base_path; + bool successful_save = asset_manager.StoreResourcePath(to_store, + resource_name, resource_base_path, file_type, evaluableNodeManager, escape_filename, sort_keys); + + evaluableNodeManager->FreeNodeTreeIfPossible(to_store); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(successful_save ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_STORE_ENTITY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + if(!asset_manager.DoesEntityHaveRootPermission(curEntity)) + return EvaluableNodeReference::Null(); + + std::string resource_name = InterpretNodeIntoStringValueEmptyNull(ocn[0]); + if(resource_name == "") + return EvaluableNodeReference::Null(); + + //get the id of the source entity to store. Don't need to keep the reference because it won't be used once the source entety pointer is looked up + auto source_id_node = InterpretNodeForImmediateUse(ocn[1]); + Entity *source_entity = TraverseToExistingEntityViaEvaluableNodeIDPath(curEntity, source_id_node); + evaluableNodeManager->FreeNodeTreeIfPossible(source_id_node); + + if(source_entity == nullptr || source_entity == curEntity) + return EvaluableNodeReference::Null(); + + bool escape_filename = false; + if(ocn.size() >= 3) + escape_filename = InterpretNodeIntoBoolValue(ocn[2], false); + + bool escape_contained_filenames = true; + if(ocn.size() >= 4) + escape_contained_filenames = InterpretNodeIntoBoolValue(ocn[3], true); + + std::string file_type = ""; + if(ocn.size() >= 5) + { + auto [valid, file_type_temp] = InterpretNodeIntoStringValue(ocn[4]); + if(valid) + file_type = file_type_temp; + } + + bool sort_keys = false; + if(ocn.size() >= 6) + { + EvaluableNodeReference params = InterpretNodeForImmediateUse(ocn[5]); + + if(EvaluableNode::IsAssociativeArray(params)) + { + auto &mcn = params->GetMappedChildNodesReference(); + + auto found_sort_keys = mcn.find(ENBISI_sort_keys); + if(found_sort_keys != end(mcn)) + sort_keys = EvaluableNode::IsTrue(found_sort_keys->second); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(params); + } + + bool stored_successfully = asset_manager.StoreEntityToResourcePath(source_entity, resource_name, file_type, + false, true, escape_filename, escape_contained_filenames, sort_keys); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(stored_successfully ? ENT_TRUE : ENT_FALSE), true); +} diff --git a/src/Amalgam/interpreter/InterpreterOpcodesListManipulation.cpp b/src/Amalgam/interpreter/InterpreterOpcodesListManipulation.cpp new file mode 100644 index 00000000..80422418 --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterOpcodesListManipulation.cpp @@ -0,0 +1,684 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EvaluableNodeTreeDifference.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "PerformanceProfiler.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +EvaluableNodeReference Interpreter::InterpretNode_ENT_FIRST(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + //get the "list" itself + auto list = InterpretNodeForImmediateUse(ocn[0]); + if(list == nullptr) + return EvaluableNodeReference::Null(); + + if(list->IsOrderedArray()) + { + auto &list_ocn = list->GetOrderedChildNodesReference(); + if(list_ocn.size() > 0) + { + EvaluableNodeReference first(list_ocn[0], list.unique); + if(list.unique && !list->GetNeedCycleCheck()) + { + for(size_t i = 1; i < list_ocn.size(); i++) + evaluableNodeManager->FreeNodeTree(list_ocn[i]); + + evaluableNodeManager->FreeNode(list); + } + return first; + } + } + else if(list->IsAssociativeArray()) + { + auto &list_mcn = list->GetMappedChildNodesReference(); + if(list_mcn.size() > 0) + { + //keep reference to first of map before free rest of it + const auto &first_itr = begin(list_mcn); + EvaluableNode *first_en = first_itr->second; + + if(list.unique && !list->GetNeedCycleCheck()) + { + for(auto &[_, cn] : list_mcn) + { + if(cn != first_en) + evaluableNodeManager->FreeNodeTree(cn); + } + + evaluableNodeManager->FreeNode(list); + } + + return EvaluableNodeReference(first_en, list.unique); + } + } + else //if(list->IsImmediate()) + { + if(DoesEvaluableNodeTypeUseStringData(list->GetType())) + { + auto sid = list->GetStringID(); + if(sid <= string_intern_pool.EMPTY_STRING_ID) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, StringInternPool::NOT_A_STRING_ID), true); + + std::string s = string_intern_pool.GetStringFromID(sid); + size_t utf8_char_length = StringManipulation::GetUTF8CharacterLength(s, 0); + + evaluableNodeManager->FreeNodeTreeIfPossible(list); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, s.substr(0, utf8_char_length)), true); + } + + if(DoesEvaluableNodeTypeUseNumberData(list->GetType())) + { + //return 0 if zero + double value = list->GetNumberValue(); + if(FastIsNaN(value)) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(std::numeric_limits::quiet_NaN()), true); + + if(value == 0.0) + return list; + + //return 1 if nonzero + evaluableNodeManager->FreeNodeTreeIfPossible(list); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(1.0), true); + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(list); + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TAIL(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto list = InterpretNodeForImmediateUse(ocn[0]); + if(list == nullptr) + return EvaluableNodeReference::Null(); + + auto node_stack = CreateInterpreterNodeStackStateSaver(list); + + //default to tailing to all but the first element + double tail_by = -1; + if(ocn.size() > 1) + tail_by = InterpretNodeIntoNumberValue(ocn[1]); + + if(list->IsOrderedArray()) + { + if(list->GetOrderedChildNodesReference().size() > 0) + { + if(!list.unique) + { + //make a copy so can edit node + list.reference = evaluableNodeManager->AllocNode(list); + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(list); + } + + auto &list_ocn = list->GetOrderedChildNodesReference(); + //remove the first element(s) + if(tail_by > 0 && tail_by < list_ocn.size()) + { + double first_index = list_ocn.size() - tail_by; + list_ocn.erase(begin(list_ocn), begin(list_ocn) + static_cast(first_index)); + } + else if(tail_by < 0) + { + //make sure have things to remove while keeping something in the list + if(-tail_by < list_ocn.size()) + list_ocn.erase(begin(list_ocn), begin(list_ocn) + static_cast(-tail_by)); + else //remove everything + list_ocn.clear(); + } + + return list; + } + } + else if(list->IsAssociativeArray()) + { + if(list->GetMappedChildNodesReference().size() > 0) + { + if(!list.unique) + { + //make a copy so can edit node + list.reference = evaluableNodeManager->AllocNode(list); + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(list); + } + + //just remove the first, because it's more efficient and the order does not matter for maps + size_t num_to_remove = 0; + if(tail_by > 0 && tail_by < list->GetMappedChildNodesReference().size()) + num_to_remove = list->GetMappedChildNodesReference().size() - static_cast(tail_by); + else if(tail_by < 0) + num_to_remove = static_cast(-tail_by); + + //remove individually + for(size_t i = 0; list->GetMappedChildNodesReference().size() > 0 && i < num_to_remove; i++) + { + const auto &mcn = list->GetMappedChildNodesReference(); + const auto &iter = begin(mcn); + list->EraseMappedChildNode(iter->first); + } + + return list; + } + } + else //list->IsImmediate() + { + if(DoesEvaluableNodeTypeUseStringData(list->GetType())) + { + auto sid = list->GetStringID(); + if(sid <= string_intern_pool.EMPTY_STRING_ID) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, StringInternPool::NOT_A_STRING_ID), true); + + std::string s = string_intern_pool.GetStringFromID(sid); + + //remove the first element(s) + size_t num_chars_to_drop = 0; + if(tail_by > 0) + { + size_t num_characters = StringManipulation::GetNumUTF8Characters(s); + //cap because can't remove a negative number of characters + num_chars_to_drop = static_cast(std::max(0.0, num_characters - tail_by)); + } + else if(tail_by < 0) + { + num_chars_to_drop = static_cast(-tail_by); + } + + //drop the number of characters before this length + size_t utf8_start_offset = StringManipulation::GetNthUTF8CharacterOffset(s, num_chars_to_drop); + + evaluableNodeManager->FreeNodeTreeIfPossible(list); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, s.substr(utf8_start_offset, s.size() - utf8_start_offset)), true); + } + + if(DoesEvaluableNodeTypeUseNumberData(list->GetType())) + { + //return 0 if zero + double value = list->GetNumberValue(); + if(value == 0.0) + return list; + + evaluableNodeManager->FreeNodeTreeIfPossible(list); + + //return (value - 1.0) if nonzero + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value - 1.0), true); + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(list); + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_LAST(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + //get the list itself + auto list = InterpretNodeForImmediateUse(ocn[0]); + if(list == nullptr) + return EvaluableNodeReference::Null(); + + if(list->IsOrderedArray()) + { + auto &list_ocn = list->GetOrderedChildNodesReference(); + if(list_ocn.size() > 0) + { + //keep reference to first before free rest of it + EvaluableNodeReference last(list_ocn[list_ocn.size() - 1], list.unique); + + if(list.unique && !list->GetNeedCycleCheck()) + { + for(size_t i = 0; i < list_ocn.size() - 1; i++) + evaluableNodeManager->FreeNodeTree(list_ocn[i]); + + evaluableNodeManager->FreeNode(list); + } + return last; + } + } + else if(list->IsAssociativeArray()) + { + auto &list_mcn = list->GetMappedChildNodes(); + if(list_mcn.size() > 0) + { + //just take the first, because it's more efficient and the order does not matter for maps + //keep reference to first of map before free rest of it + EvaluableNode *last_en = begin(list_mcn)->second; + + if(list.unique && !list->GetNeedCycleCheck()) + { + for(auto &[_, cn] : list_mcn) + { + if(cn != last_en) + evaluableNodeManager->FreeNodeTree(cn); + } + + evaluableNodeManager->FreeNode(list); + } + + return EvaluableNodeReference(last_en, list.unique); + } + } + else //list->IsImmediate() + { + if(DoesEvaluableNodeTypeUseStringData(list->GetType())) + { + auto sid = list->GetStringID(); + if(sid <= string_intern_pool.EMPTY_STRING_ID) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, StringInternPool::NOT_A_STRING_ID), true); + + std::string s = string_intern_pool.GetStringFromID(sid); + + auto [utf8_char_start_offset, utf8_char_length] = StringManipulation::GetLastUTF8CharacterOffsetAndLength(s); + + evaluableNodeManager->FreeNodeTreeIfPossible(list); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, s.substr(utf8_char_start_offset, utf8_char_length)), true); + } + + if(DoesEvaluableNodeTypeUseNumberData(list->GetType())) + { + //return 0 if zero + double value = list->GetNumberValue(); + if(value == 0.0) + return list; + + //return 1 if nonzero + evaluableNodeManager->FreeNodeTreeIfPossible(list); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(1.0), true); + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(list); + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TRUNC(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto list = InterpretNodeForImmediateUse(ocn[0]); + if(list == nullptr) + return EvaluableNodeReference::Null(); + + auto node_stack = CreateInterpreterNodeStackStateSaver(list); + + //default to truncating to all but the last element + double truncate_to = -1; + if(ocn.size() > 1) + truncate_to = InterpretNodeIntoNumberValue(ocn[1]); + + if(list->IsOrderedArray()) + { + if(!list.unique) + { + //make a copy so can edit node + list.reference = evaluableNodeManager->AllocNode(list); + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(list); + } + + auto &list_ocn = list->GetOrderedChildNodesReference(); + + //remove the last element(s) + if(truncate_to > 0 && truncate_to < list_ocn.size()) + { + list->GetOrderedChildNodes().erase(begin(list_ocn) + static_cast(truncate_to), end(list_ocn)); + } + else if(truncate_to < 0) + { + //make sure have things to remove while keeping something in the list + if(-truncate_to < list_ocn.size()) + { + size_t last_index = static_cast(truncate_to + list->GetOrderedChildNodes().size()); + list_ocn.erase(begin(list_ocn) + last_index, end(list_ocn)); + } + else //remove everything + list_ocn.clear(); + } + + return list; + } + else if(list->IsAssociativeArray()) + { + if(!list.unique) + { + //make a copy so can edit node + list.reference = evaluableNodeManager->AllocNode(list); + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(list); + } + + //just remove the first, because it's more efficient and the order does not matter for maps + size_t num_to_remove = 0; + if(truncate_to > 0 && truncate_to < list->GetMappedChildNodesReference().size()) + num_to_remove = list->GetMappedChildNodesReference().size() - static_cast(truncate_to); + else if(truncate_to < 0) + num_to_remove = static_cast(-truncate_to); + + //remove individually + for(size_t i = 0; list->GetMappedChildNodesReference().size() > 0 && i < num_to_remove; i++) + { + const auto &mcn = list->GetMappedChildNodesReference(); + const auto &iter = begin(mcn); + list->EraseMappedChildNode(iter->first); + } + + return list; + } + else //if(list->IsImmediate()) + { + if(DoesEvaluableNodeTypeUseStringData(list->GetType())) + { + auto sid = list->GetStringID(); + if(sid <= string_intern_pool.EMPTY_STRING_ID) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, StringInternPool::NOT_A_STRING_ID), true); + + std::string s = string_intern_pool.GetStringFromID(sid); + + //remove the last element(s) + size_t num_chars_to_keep = 0; + if(truncate_to > 0) + { + num_chars_to_keep = static_cast(truncate_to); + } + else if(truncate_to < 0) + { + size_t num_characters = StringManipulation::GetNumUTF8Characters(s); + + //cap because can't remove a negative number of characters, and add truncate_to because truncate_to is negative (technically want a subtract) + num_chars_to_keep = static_cast(std::max(0.0, num_characters + truncate_to)); + } + + //remove everything after after this length + size_t utf8_end_offset = StringManipulation::GetNthUTF8CharacterOffset(s, num_chars_to_keep); + + evaluableNodeManager->FreeNodeTreeIfPossible(list); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_STRING, s.substr(0, utf8_end_offset)), true); + } + + if(DoesEvaluableNodeTypeUseNumberData(list->GetType())) + { + //return 0 if zero + double value = list->GetNumberValue(); + if(value == 0.0) + return list; + + //return (value - 1.0) if nonzero + evaluableNodeManager->FreeNodeTreeIfPossible(list); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value - 1.0), true); + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(list); + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_APPEND(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference new_list(evaluableNodeManager->AllocNode(ENT_LIST), true); + auto node_stack = CreateInterpreterNodeStackStateSaver(new_list); + + size_t new_list_cur_index = 0; + size_t num_non_unique_child_nodes = 0; + for(auto ¶m : ocn) + { + if(AreExecutionResourcesExhausted()) + return EvaluableNodeReference::Null(); + + //get evaluated parameter + auto new_elements = InterpretNode(param); + + //keep track of the attributes for new_list + if(!new_elements.unique) + num_non_unique_child_nodes++; + new_list.UpdatePropertiesBasedOnAttachedNode(new_elements); + + if(EvaluableNode::IsAssociativeArray(new_elements)) + { + if(new_list->GetType() == ENT_LIST) + new_list->ConvertOrderedListToNumberedAssoc(); + + for(auto &[node_to_insert_id, node_to_insert] : new_elements->GetMappedChildNodesReference()) + { + //clobber if already exist, leave for garbage collection + new_list->SetMappedChildNode(node_to_insert_id, node_to_insert); + } + + //don't need the top node anymore + evaluableNodeManager->FreeNodeIfPossible(new_elements); + } + else if(new_elements != nullptr && new_elements->GetType() == ENT_LIST) + { + auto &new_elements_ocn = new_elements->GetOrderedChildNodesReference(); + if(new_list->GetType() == ENT_LIST) + new_list->GetOrderedChildNodes().insert(end(new_list->GetOrderedChildNodes()), begin(new_elements_ocn), end(new_elements_ocn)); + else + { + //find the lowest unused index number + for(size_t i = 0; i < new_elements_ocn.size(); i++, new_list_cur_index++) + { + //look for first index not used + std::string index_string = EvaluableNode::NumberToString(new_list_cur_index); + EvaluableNode **found = new_list->GetMappedChildNode(index_string); + if(found != nullptr) + { + i--; //try this again with the next index + continue; + } + new_list->SetMappedChildNode(index_string, new_elements_ocn[i]); + } + } + + //don't need the top node anymore + evaluableNodeManager->FreeNodeIfPossible(new_elements); + } + else //not a map or list, just append the element singularly + { + if(new_list->GetType() == ENT_LIST) + new_list->AppendOrderedChildNode(new_elements); + else + { + //find the next unused index + std::string index_string; + do { + index_string = EvaluableNode::NumberToString(static_cast(new_list_cur_index++)); + } while(new_list->GetMappedChildNode(index_string) != nullptr); + + new_list->SetMappedChildNode(index_string, new_elements); + } + } + + } //for each child node to append + + //if there is more than one non-unique child node, they could theoretically be the same node + // so therefore force a cycle check + if(num_non_unique_child_nodes > 1) + new_list->SetNeedCycleCheck(true); + + return new_list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SIZE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto cur = InterpretNodeForImmediateUse(ocn[0]); + size_t size = 0; + if(cur != nullptr) + { + if(cur->GetType() == ENT_STRING) + { + const auto &s = cur->GetStringValue(); + size = StringManipulation::GetNumUTF8Characters(s); + } + else + { + size = cur->GetNumChildNodes(); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(static_cast(size)), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_RANGE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + size_t num_params = ocn.size(); + + if(num_params < 2) + return EvaluableNodeReference::Null(); + + //get the index of the start index based on how many parameters there are, if there is a function + size_t index_of_start = (num_params < 4 ? 0 : 1); + + double range_start = InterpretNodeIntoNumberValue(ocn[index_of_start + 0]); + double range_end = InterpretNodeIntoNumberValue(ocn[index_of_start + 1]); + + if(FastIsNaN(range_start) || FastIsNaN(range_end)) + return EvaluableNodeReference::Null(); + + //default step size + double range_step_size = 1; + if(range_end < range_start) + range_step_size = -1; + + //if specified step size, get and make sure it's ok + if(num_params > 2) + { + range_step_size = InterpretNodeIntoNumberValue(ocn[index_of_start + 2]); + if(FastIsNaN(range_step_size)) + return EvaluableNodeReference::Null(); + + //if not a good size, return empty list + if(!(range_start <= range_end && range_step_size > 0) + && !(range_end <= range_start && range_step_size < 0)) + { + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_LIST), true); + } + } + + size_t num_nodes = static_cast((range_end - range_start) / range_step_size) + 1; + + //make sure not eating up too much memory + if(!AllowUnlimitedExecutionNodes() && curNumExecutionNodes + num_nodes >= maxNumExecutionNodes) + { + curNumExecutionNodes = maxNumExecutionNodes; + //also make it fail by adding to the cumulative allocation pool reserved for entities, in case curNumExecutionNodes is recalculated + curNumExecutionNodesAllocatedToEntities = maxNumExecutionNodes; + return EvaluableNodeReference::Null(); + } + + //if no function, just return a list of numbers + if(index_of_start == 0) + { + EvaluableNodeReference range_list(evaluableNodeManager->AllocListNodeWithOrderedChildNodes(ENT_NUMBER, num_nodes), true); + + auto &range_list_ocn = range_list->GetOrderedChildNodes(); + for(size_t i = 0; i < num_nodes; i++) + range_list_ocn[i]->SetNumberValue(i * range_step_size + range_start); + + return range_list; + } + + //if a function is specified, then set up appropriate data structures to call the function and move the indices for the index and value parameters + EvaluableNodeReference function = InterpretNode(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(function); + + EvaluableNodeReference result(evaluableNodeManager->AllocNode(ENT_LIST), true); + auto &list_ocn = result->GetOrderedChildNodesReference(); + list_ocn.resize(num_nodes); + +#ifdef MULTITHREAD_SUPPORT + if(en->GetConcurrency() && num_nodes > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + node_stack.PushEvaluableNode(result); + + ConcurrencyManager concurrency_manager(this, num_nodes); + + for(size_t node_index = 0; node_index < num_nodes; node_index++) + concurrency_manager.PushTaskToResultFuturesWithConstructionStack(function, + nullptr, result, EvaluableNodeImmediateValueWithType(node_index * range_step_size + range_start), nullptr); + + enqueue_task_lock.Unlock(); + + concurrency_manager.EndConcurrency(); + + //filter by those child nodes that are true + auto evaluations = concurrency_manager.GetResultsAndFreeReferences(); + auto &result_ocn = result->GetOrderedChildNodes(); + for(size_t i = 0; i < num_nodes; i++) + { + result_ocn[i] = evaluations[i]; + result.UpdatePropertiesBasedOnAttachedNode(evaluations[i]); + } + + return result; + } + } +#endif + + PushNewConstructionContext(nullptr, result, EvaluableNodeImmediateValueWithType(0.0), nullptr); + + auto &result_ocn = result->GetOrderedChildNodesReference(); + for(size_t i = 0; i < num_nodes; i++) + { + //pass index of list to be mapped -- leave value at nullptr + SetTopTargetValueIndexInConstructionStack(i * range_step_size + range_start); + + EvaluableNodeReference element_result = InterpretNode(function); + result_ocn[i] = element_result; + result.UpdatePropertiesBasedOnAttachedNode(element_result); + } + + PopConstructionContext(); + + return result; +} diff --git a/src/Amalgam/interpreter/InterpreterOpcodesLogic.cpp b/src/Amalgam/interpreter/InterpreterOpcodesLogic.cpp new file mode 100644 index 00000000..62007e52 --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterOpcodesLogic.cpp @@ -0,0 +1,634 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "EvaluableNodeTreeDifference.h" +#include "PerformanceProfiler.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +EvaluableNodeReference Interpreter::InterpretNode_ENT_AND(EvaluableNode *en) +{ + EvaluableNodeReference cur = EvaluableNodeReference::Null(); + auto &ocn = en->GetOrderedChildNodes(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + for(auto &cn : interpreted_nodes) + { + //free the previous node if applicable + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + cur = cn; + + if(!EvaluableNode::IsTrue(cur)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + } + + return cur; + } +#endif + + for(auto &cn : ocn) + { + //free the previous node if applicable + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + cur = InterpretNode(cn); + + if(!EvaluableNode::IsTrue(cur)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + } + return cur; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_OR(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + for(auto &cur : interpreted_nodes) + { + //if it is a valid node and it is not zero, then return it + if(EvaluableNode::IsTrue(cur)) + return cur; + + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } +#endif + + for(auto &cn : ocn) + { + auto cur = InterpretNode(cn); + + //if it is a valid node and it is not zero, then return it + if(EvaluableNode::IsTrue(cur)) + return cur; + + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + } + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_XOR(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + size_t num_true = 0; + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + for(auto &cur : interpreted_nodes) + { + //if it's true, count it + if(EvaluableNode::IsTrue(cur)) + num_true++; + + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + } + + //if an odd number of true arguments, then return true + return EvaluableNodeReference(evaluableNodeManager->AllocNode((num_true % 2 == 1) ? ENT_TRUE : ENT_FALSE), true); + } +#endif + + //count number of true values + for(auto &cn : ocn) + { + if(InterpretNodeIntoBoolValue(cn)) + num_true++; + } + + //if an odd number of true arguments, then return true + return EvaluableNodeReference(evaluableNodeManager->AllocNode((num_true % 2 == 1) ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_NOT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + auto cur = InterpretNodeForImmediateUse(ocn[0]); + + bool is_true = EvaluableNode::IsTrue(cur); + + if(cur.unique && cur != nullptr) + cur->ClearAndSetType(is_true ? ENT_FALSE : ENT_TRUE); + else + cur = EvaluableNodeReference(evaluableNodeManager->AllocNode(is_true ? ENT_FALSE : ENT_TRUE), true); + + return cur; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_EQUAL(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + bool processed_first_value = false; + EvaluableNodeReference to_match = EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + for(auto &cur : interpreted_nodes) + { + //if haven't gotten a value yet, then use this as the first data + if(!processed_first_value) + { + to_match = cur; + processed_first_value = true; + continue; + } + + if(!EvaluableNode::AreDeepEqual(to_match, cur)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(to_match); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(to_match); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_TRUE), true); + } +#endif + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + + for(auto &cn : ocn) + { + auto cur = InterpretNodeForImmediateUse(cn); + + //if haven't gotten a value yet, then use this as the first data + if(!processed_first_value) + { + to_match = cur; + node_stack.PushEvaluableNode(to_match); + processed_first_value = true; + continue; + } + + if(!EvaluableNode::AreDeepEqual(to_match, cur)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(to_match); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(to_match); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_TRUE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_NEQUAL(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + bool all_not_equal = true; + for(size_t i = 0; i < interpreted_nodes.size(); i++) + { + //don't compare versus self, and skip any previously compared against + for(size_t j = i + 1; j < interpreted_nodes.size(); j++) + { + //if they're equal, then it fails + if(EvaluableNode::AreDeepEqual(interpreted_nodes[i], interpreted_nodes[j])) + { + all_not_equal = false; + + //break out of loop + i = interpreted_nodes.size(); + break; + } + } + } + + for(size_t i = 0; i < interpreted_nodes.size(); i++) + evaluableNodeManager->FreeNodeTreeIfPossible(interpreted_nodes[i]); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(all_not_equal ? ENT_TRUE : ENT_FALSE), true); + } +#endif + + //special (faster) case for comparing two + if(ocn.size() == 2) + { + EvaluableNodeReference a = InterpretNodeForImmediateUse(ocn[0]); + + auto node_stack = CreateInterpreterNodeStackStateSaver(a); + EvaluableNodeReference b = InterpretNodeForImmediateUse(ocn[1]); + + bool a_b_not_equal = (!EvaluableNode::AreDeepEqual(a, b)); + evaluableNodeManager->FreeNodeTreeIfPossible(a); + evaluableNodeManager->FreeNodeTreeIfPossible(b); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(a_b_not_equal ? ENT_TRUE : ENT_FALSE), true); + } + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + + //get the value for each node + std::vector values; + values.reserve(ocn.size()); + for(size_t i = 0; i < ocn.size(); i++) + { + values.push_back(InterpretNodeForImmediateUse(ocn[i])); + node_stack.PushEvaluableNode(values[i]); + } + + bool all_not_equal = true; + for(size_t i = 0; i < values.size(); i++) + { + //don't compare versus self, and skip any previously compared against + for(size_t j = i + 1; j < values.size(); j++) + { + //if they're equal, then it fails + if(EvaluableNode::AreDeepEqual(values[i], values[j])) + { + all_not_equal = false; + + //break out of loop + i = values.size(); + break; + } + } + } + + for(size_t i = 0; i < values.size(); i++) + evaluableNodeManager->FreeNodeTreeIfPossible(values[i]); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(all_not_equal ? ENT_TRUE : ENT_FALSE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_LESS_and_LEQUAL(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + //if none or one node, then there's no order + if(ocn.size() < 2) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + EvaluableNodeReference prev = interpreted_nodes[0]; + if(EvaluableNode::IsNaN(prev)) + { + for(auto &n : interpreted_nodes) + evaluableNodeManager->FreeNodeTreeIfPossible(n); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + EvaluableNodeType return_type = ENT_TRUE; + + for(size_t i = 1; i < interpreted_nodes.size(); i++) + { + //if not in strict increasing order, return false + auto &cur = interpreted_nodes[i]; + + if(EvaluableNode::IsNaN(cur)) + { + return_type = ENT_FALSE; + break; + } + + if(!EvaluableNode::IsLessThan(prev, cur, en->GetType() == ENT_LEQUAL)) + { + return_type = ENT_FALSE; + break; + } + } + + for(auto &n : interpreted_nodes) + evaluableNodeManager->FreeNodeTreeIfPossible(n); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(return_type), true); + } +#endif + + auto prev = InterpretNodeForImmediateUse(ocn[0]); + if(EvaluableNode::IsEmptyNode(prev)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + auto node_stack = CreateInterpreterNodeStackStateSaver(prev); + + for(size_t i = 1; i < ocn.size(); i++) + { + //if not in strict increasing order, return false + auto cur = InterpretNodeForImmediateUse(ocn[i]); + + if(EvaluableNode::IsEmptyNode(cur)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + if(!EvaluableNode::IsLessThan(prev, cur, en->GetType() == ENT_LEQUAL)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + prev = cur; + + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(prev); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + + //nothing is out of order + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_TRUE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GREATER_and_GEQUAL(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + //if none or one node, then it's in order + if(ocn.size() < 2) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + EvaluableNodeReference prev = interpreted_nodes[0]; + if(EvaluableNode::IsNaN(prev)) + { + for(auto &n : interpreted_nodes) + evaluableNodeManager->FreeNodeTreeIfPossible(n); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + EvaluableNodeType return_type = ENT_TRUE; + + for(size_t i = 1; i < interpreted_nodes.size(); i++) + { + //if not in strict increasing order, return false + auto &cur = interpreted_nodes[i]; + + if(EvaluableNode::IsNaN(cur)) + { + return_type = ENT_FALSE; + break; + } + + if(!EvaluableNode::IsLessThan(cur, prev, en->GetType() == ENT_GEQUAL)) + { + return_type = ENT_FALSE; + break; + } + } + + for(auto &n : interpreted_nodes) + evaluableNodeManager->FreeNodeTreeIfPossible(n); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(return_type), true); + } +#endif + + auto prev = InterpretNodeForImmediateUse(ocn[0]); + if(EvaluableNode::IsEmptyNode(prev)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + auto node_stack = CreateInterpreterNodeStackStateSaver(prev); + + for(size_t i = 1; i < ocn.size(); i++) + { + //if not in strict increasing order, return false + auto cur = InterpretNodeForImmediateUse(ocn[i]); + + if(EvaluableNode::IsEmptyNode(cur)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + if(!EvaluableNode::IsLessThan(cur, prev, en->GetType() == ENT_GEQUAL)) + { + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + prev = cur; + + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(prev); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(prev); + + //nothing is out of order + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_TRUE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TYPE_EQUALS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + bool processed_first_value = false; + EvaluableNodeReference to_match = EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + for(auto &cur : interpreted_nodes) + { + //if haven't gotten a value yet, then use this as the first data + if(!processed_first_value) + { + to_match = cur; + processed_first_value = true; + continue; + } + + EvaluableNodeType cur_type = ENT_NULL; + if(cur != nullptr) + cur_type = cur->GetType(); + + EvaluableNodeType to_match_type = ENT_NULL; + if(to_match != nullptr) + to_match_type = to_match->GetType(); + + if(cur_type != to_match_type) + { + evaluableNodeManager->FreeNodeTreeIfPossible(to_match); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(to_match); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_TRUE), true); + } +#endif + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + + for(auto &cn : ocn) + { + auto cur = InterpretNodeForImmediateUse(cn); + + //if haven't gotten a value yet, then use this as the first data + if(!processed_first_value) + { + to_match = cur; + node_stack.PushEvaluableNode(to_match); + processed_first_value = true; + continue; + } + + EvaluableNodeType cur_type = ENT_NULL; + if(cur != nullptr) + cur_type = cur->GetType(); + + EvaluableNodeType to_match_type = ENT_NULL; + if(to_match != nullptr) + to_match_type = to_match->GetType(); + + if(cur_type != to_match_type) + { + evaluableNodeManager->FreeNodeTreeIfPossible(to_match); + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(cur); + } + + evaluableNodeManager->FreeNodeTreeIfPossible(to_match); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_TRUE), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TYPE_NEQUALS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNodeType result_type = ENT_TRUE; + + std::vector values(ocn.size()); + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + + //evaluate all nodes just once + for(size_t i = 0; i < ocn.size(); i++) + { + values[i] = InterpretNodeForImmediateUse(ocn[i]); + node_stack.PushEvaluableNode(values[i]); + } + + for(size_t i = 0; i < ocn.size(); i++) + { + //start at next higher, because comparisons are symmetric and don't need to compare with self + for(size_t j = i + 1; j < ocn.size(); j++) + { + EvaluableNode *cur1 = values[i]; + EvaluableNode *cur2 = values[j]; + + //if they're equal, then it fails + if((cur1 == nullptr && cur2 == nullptr) || (cur1 != nullptr && cur2 != nullptr && cur1->GetType() == cur2->GetType())) + { + result_type = ENT_FALSE; + + //break out of loop + i = ocn.size(); + break; + } + } + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(result_type), true); +} diff --git a/src/Amalgam/interpreter/InterpreterOpcodesMath.cpp b/src/Amalgam/interpreter/InterpreterOpcodesMath.cpp new file mode 100644 index 00000000..4315ca13 --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterOpcodesMath.cpp @@ -0,0 +1,1344 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "EntityQueryBuilder.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "EvaluableNodeTreeDifference.h" +#include "PerformanceProfiler.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ADD(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + double value = 0.0; + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + for(auto &cn : interpreted_nodes) + value += EvaluableNode::ToNumber(cn); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); + } +#endif + + for(auto &cn : ocn) + value += InterpretNodeIntoNumberValue(cn); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SUBTRACT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + double value = EvaluableNode::ToNumber(interpreted_nodes[0]); + for(size_t i = 1; i < ocn.size(); i++) + value -= EvaluableNode::ToNumber(interpreted_nodes[i]); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); + } +#endif + + double value = InterpretNodeIntoNumberValue(ocn[0]); + for(size_t i = 1; i < ocn.size(); i++) + value -= InterpretNodeIntoNumberValue(ocn[i]); + + //if just one parameter, then treat as negative + if(ocn.size() == 1) + value = -value; + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MULTIPLY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + double value = 1.0; + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + for(auto &cn : interpreted_nodes) + value *= EvaluableNode::ToNumber(cn); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); + } +#endif + + for(auto &cn : ocn) + value *= InterpretNodeIntoNumberValue(cn); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_DIVIDE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + double value = EvaluableNode::ToNumber(interpreted_nodes[0]); + for(size_t i = 1; i < interpreted_nodes.size(); i++) + { + double divisor = EvaluableNode::ToNumber(interpreted_nodes[i]); + + if(divisor != 0.0) + value /= divisor; + else + { + if(value > 0.0) + value = std::numeric_limits::infinity(); + else if(value < 0.0) + value = -std::numeric_limits::infinity(); + else + value = std::numeric_limits::quiet_NaN(); + + break; + } + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); + } +#endif + + double value = InterpretNodeIntoNumberValue(ocn[0]); + for(size_t i = 1; i < ocn.size(); i++) + { + double divisor = InterpretNodeIntoNumberValue(ocn[i]); + + if(divisor != 0.0) + value /= divisor; + else + { + if(value > 0.0) + value = std::numeric_limits::infinity(); + else if(value < 0.0) + value = -std::numeric_limits::infinity(); + else + value = std::numeric_limits::quiet_NaN(); + + break; + } + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MODULUS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + double value = EvaluableNode::ToNumber(interpreted_nodes[0]); + for(size_t i = 1; i < interpreted_nodes.size(); i++) + { + double mod = EvaluableNode::ToNumber(interpreted_nodes[i]); + value = std::fmod(value, mod); + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); + } +#endif + + double value = InterpretNodeIntoNumberValue(ocn[0]); + for(size_t i = 1; i < ocn.size(); i++) + { + double mod = InterpretNodeIntoNumberValue(ocn[i]); + value = std::fmod(value, mod); + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); +} + +//helper method for InterpretNode_ENT_GET_DIGITS and InterpretNode_ENT_SET_DIGITS +//if relative_to_zero the digits are indexed as +// 5 4 3 2 1 0 . -1 -2 +//if not relative_to_zero, the digits are indexed as +// 0 1 2 3 4 5 . 6 7 +//for a given value and a base of the digits, sets first_digit, start_digit, and end_digit to be relative to zero +//accepts infinities and NaNs and still sets them appropriately +//first_digit is the first digit in the number (most significant), start_digit and end_digit are the digits selected +//if first_digit does not need to be computed, then it will be left unchanged +inline void NormalizeStartAndEndDigitToZerosPlace(double value, double base, bool relative_to_zero, + double &first_digit, double &start_digit, double &end_digit) +{ + //compute max_num_digits using data on how the numbers are stored + constexpr size_t max_num_storage_digits = std::numeric_limits::digits; + constexpr size_t storage_radix = std::numeric_limits::radix; + double max_num_digits = (storage_radix / base) * max_num_storage_digits; + + if(relative_to_zero) + { + //if start is infinite, start at top + if(start_digit == std::numeric_limits::infinity() || FastIsNaN(start_digit)) + { + first_digit = std::floor(std::log(value) / std::log(base)); + start_digit = first_digit; + } + + //if end is negative infinite, start at end + if(end_digit == std::numeric_limits::infinity() || FastIsNaN(end_digit)) + end_digit = start_digit - max_num_digits; + } + else //not relative to zero + { + first_digit = std::floor(std::log(value) / std::log(base)); + start_digit = first_digit - start_digit; + + if(end_digit == std::numeric_limits::infinity() || FastIsNaN(end_digit)) + end_digit = start_digit - max_num_digits; + else //valid position + end_digit = first_digit - end_digit; + } + + //make sure only use valid digits + if(end_digit < start_digit - max_num_digits) + end_digit = start_digit - max_num_digits; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GET_DIGITS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + size_t num_params = ocn.size(); + if(num_params == 0) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_LIST), true); + + double value = InterpretNodeIntoNumberValue(ocn[0]); + //negative numbers have the same digits + value = std::abs(value); + if(FastIsNaN(value) || value == std::numeric_limits::infinity()) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_LIST), true); + + double base = 10; + if(num_params > 1) + { + base = InterpretNodeIntoNumberValue(ocn[1]); + if(base <= 0) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_LIST), true); + } + + bool relative_to_zero = true; + if(num_params > 4) + relative_to_zero = InterpretNodeIntoBoolValue(ocn[4]); + + double start_digit = (relative_to_zero ? std::numeric_limits::infinity() : 0); + if(num_params > 2) + start_digit = InterpretNodeIntoNumberValue(ocn[2]); + + double end_digit = (relative_to_zero ? -std::numeric_limits::infinity() : std::numeric_limits::infinity()); + if(num_params > 3) + end_digit = InterpretNodeIntoNumberValue(ocn[3]); + + //leave first_digit as NaN; can check if non-NaN and lazily computed if needed later + double first_digit = std::numeric_limits::quiet_NaN(); + NormalizeStartAndEndDigitToZerosPlace(value, base, relative_to_zero, first_digit, start_digit, end_digit); + + EvaluableNodeReference digits(evaluableNodeManager->AllocNode(ENT_LIST), true); + auto &digits_ocn = digits->GetOrderedChildNodes(); + if(std::isfinite(start_digit) && std::isfinite(end_digit) && start_digit >= end_digit) + { + size_t num_digits = static_cast(std::floor(start_digit - end_digit + 1)); + digits_ocn.reserve(num_digits); + + //if doing an integer base, can be faster + if(base - std::floor(base) == 0) + { + for(double cur_digit = start_digit; cur_digit >= end_digit; cur_digit--) + { + double place_value = std::pow(base, cur_digit); + double value_shift_right = std::floor(value / place_value); + double value_digit = std::fmod(value_shift_right, base); + digits_ocn.emplace_back(evaluableNodeManager->AllocNode(value_digit)); + } + } + else //fractional base, need special logic + { + //need to compute first digits even if they're not used, so they can be subtracted from the number + // this incurs extra performance and may reduce numerical accuracy slightly (hence not used for integer bases) + if(FastIsNaN(first_digit)) + first_digit = std::floor(std::log(value) / std::log(base)); + + //need to always start at most significant digit: + for(double cur_digit = std::max(first_digit, start_digit); cur_digit >= end_digit; cur_digit--) + { + double place_value = std::pow(base, cur_digit); + double value_shift_right = std::floor(value / place_value); + double value_digit = std::fmod(value_shift_right, base); + value -= value_digit * place_value; + + if(cur_digit <= start_digit) + digits_ocn.emplace_back(evaluableNodeManager->AllocNode(value_digit)); + } + } + } + + return digits; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SET_DIGITS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + size_t num_params = ocn.size(); + if(num_params == 0) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(std::numeric_limits::quiet_NaN()), true); + + double value = InterpretNodeIntoNumberValue(ocn[0]); + if(FastIsNaN(value) || value == std::numeric_limits::infinity()) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); + + double base = 10; + if(num_params > 1) + { + base = InterpretNodeIntoNumberValue(ocn[1]); + if(base <= 0) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); + } + + bool relative_to_zero = true; + if(num_params > 5) + relative_to_zero = InterpretNodeIntoBoolValue(ocn[5]); + + double start_digit = (relative_to_zero ? std::numeric_limits::infinity() : 0); + if(num_params > 3) + start_digit = InterpretNodeIntoNumberValue(ocn[3]); + + double end_digit = (relative_to_zero ? -std::numeric_limits::infinity() : std::numeric_limits::infinity()); + if(num_params > 4) + end_digit = InterpretNodeIntoNumberValue(ocn[4]); + + EvaluableNodeReference digits; + if(num_params > 2) + digits = InterpretNodeForImmediateUse(ocn[2]); + + if(digits == nullptr || digits->GetType() != ENT_LIST) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); + + bool negative = (value < 0); + if(negative) + value = -value; + //value to modify + double result_value = value; + + //leave first_digit as NaN; can check if non-NaN and lazily computed if needed later + double first_digit = std::numeric_limits::quiet_NaN(); + NormalizeStartAndEndDigitToZerosPlace(value, base, relative_to_zero, first_digit, start_digit, end_digit); + + auto &digits_ocn = digits->GetOrderedChildNodes(); + size_t cur_digit_index = 0; + if(std::isfinite(start_digit) && std::isfinite(end_digit) && start_digit >= end_digit) + { + //if doing an integer base, can be faster + if(base - std::floor(base) == 0) + { + for(double cur_digit = start_digit; cur_digit >= end_digit; cur_digit--) + { + double place_value = std::pow(base, cur_digit); + double value_shift_right = std::floor(value / place_value); + double value_digit = std::fmod(value_shift_right, base); + + if(cur_digit_index >= digits_ocn.size()) + break; + double new_digit = EvaluableNode::ToNumber(digits_ocn[cur_digit_index++]); + + result_value -= value_digit * place_value; + result_value += new_digit * place_value; + } + } + else //fractional base, need special logic + { + //need to compute first digits even if they're not used, so they can be subtracted from the number + // this incurs extra performance and may reduce numerical accuracy slightly (hence not used for integer bases) + if(FastIsNaN(first_digit)) + first_digit = std::floor(std::log(value) / std::log(base)); + + //need to always start at most significant digit: + for(double cur_digit = std::max(first_digit, start_digit); cur_digit >= end_digit; cur_digit--) + { + double place_value = std::pow(base, cur_digit); + double value_shift_right = std::floor(value / place_value); + double value_digit = std::fmod(value_shift_right, base); + value -= value_digit * place_value; + + if(cur_digit <= start_digit) + { + if(cur_digit_index >= digits_ocn.size()) + break; + double new_digit = EvaluableNode::ToNumber(digits_ocn[cur_digit_index++]); + + result_value -= value_digit * place_value; + result_value += new_digit * place_value; + } + } + } + } + + if(negative) + result_value = -result_value; + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(result_value), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_FLOOR(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::floor(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CEILING(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::ceil(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ROUND(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + size_t num_params = ocn.size(); + if(num_params == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + double number_value = retval->GetNumberValue(); + + if(num_params == 1) + { + //just round to the nearest integer + retval->SetNumberValue(std::round(number_value)); + } + else + { + auto node_stack = CreateInterpreterNodeStackStateSaver(retval); + + //round to the specified number of significant digits or the specified number of digits after the decimal place, whichever is larger + double num_significant_digits = InterpretNodeIntoNumberValue(ocn[1]); + + //assume don't want any digits after decimal (this will be ignored with negitive infinity) + double num_digits_after_decimal = std::numeric_limits::infinity(); + if(num_params > 2) + num_digits_after_decimal = InterpretNodeIntoNumberValue(ocn[2]); + + if(number_value != 0.0) + { + double starting_significant_digit = std::ceil(std::log10(std::fabs(number_value))); + + //decimal digits take priority over significant digits if they are specified + num_significant_digits = std::min(starting_significant_digit + num_digits_after_decimal, num_significant_digits); + + double factor = std::pow(10.0, num_significant_digits - starting_significant_digit); + retval->SetNumberValue(std::round(number_value * factor) / factor); + } + } + + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_EXPONENT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::exp(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_LOG(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + double value = InterpretNodeIntoNumberValue(ocn[0]); + double log_value = log(value); + + if(ocn.size() > 1) //base is specified, need to scale + { + double log_base = InterpretNodeIntoNumberValue(ocn[1]); + log_value /= log(log_base); + } + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(log_value), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SIN(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::sin(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ASIN(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::asin(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_COS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::cos(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ACOS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::acos(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TAN(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::tan(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ATAN(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + if(ocn.size() == 1) + { + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::atan(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); + } + else if(ocn.size() >= 2) + { + double f1 = InterpretNodeIntoNumberValue(ocn[0]); + double f2 = InterpretNodeIntoNumberValue(ocn[1]); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(std::atan2(f1, f2)), true); + } + return EvaluableNodeReference::Null(); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SINH(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::sinh(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ASINH(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::asinh(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_COSH(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::cosh(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ACOSH(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::acosh(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TANH(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::tanh(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ATANH(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::atanh(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ERF(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::erf(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_TGAMMA(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::tgamma(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_LGAMMA(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::lgamma(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SQRT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::sqrt(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_POW(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + double f1 = InterpretNodeIntoNumberValue(ocn[0]); + double f2 = InterpretNodeIntoNumberValue(ocn[1]); + return EvaluableNodeReference(evaluableNodeManager->AllocNode(std::pow(f1, f2)), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ABS(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNode *retval = InterpretNodeIntoUniqueNumberValueEvaluableNode(ocn[0]); + retval->SetNumberValue(std::abs(retval->GetNumberValue())); + return EvaluableNodeReference(retval, true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MAX(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference result = EvaluableNodeReference::Null(); + double result_value = -std::numeric_limits::infinity(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + for(size_t i = 0; i < interpreted_nodes.size(); i++) + { + //do the comparison and keep the greater + double cur_value = EvaluableNode::ToNumber(interpreted_nodes[i]); + if(cur_value > result_value) + { + result = interpreted_nodes[i]; + result_value = cur_value; + } + } + + return result; + } +#endif + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + for(auto &cn : ocn) + { + auto cur = InterpretNodeForImmediateUse(cn); + if(cur == nullptr) + continue; + + double cur_value = EvaluableNode::ToNumber(cur); + if(FastIsNaN(cur_value)) + continue; + + //if haven't gotten a result yet, then use this as the first data + if(result == nullptr) + { + node_stack.PushEvaluableNode(cur); + + result = cur; + result_value = cur_value; + continue; + } + + //do the comparison and keep the greater + if(cur_value > result_value) + { + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(cur); + + //replace previous result with cur + evaluableNodeManager->FreeNodeTreeIfPossible(result); + result = cur; + result_value = cur_value; + } + } + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MIN(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference result = EvaluableNodeReference::Null(); + double result_value = std::numeric_limits::infinity(); + +#ifdef MULTITHREAD_SUPPORT + std::vector interpreted_nodes; + if(InterpretEvaluableNodesConcurrently(en, ocn, interpreted_nodes)) + { + for(size_t i = 0; i < interpreted_nodes.size(); i++) + { + //do the comparison and keep the greater + double cur_value = EvaluableNode::ToNumber(interpreted_nodes[i]); + if(cur_value < result_value) + { + result = interpreted_nodes[i]; + result_value = cur_value; + } + } + + return result; + } +#endif + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + for(auto &cn : ocn) + { + auto cur = InterpretNodeForImmediateUse(cn); + if(cur == nullptr) + continue; + + double cur_value = EvaluableNode::ToNumber(cur); + if(FastIsNaN(cur_value)) + continue; + + //if haven't gotten a result yet, then use this as the first data + if(result == nullptr) + { + node_stack.PushEvaluableNode(cur); + + result = cur; + result_value = cur_value; + continue; + } + + //do the comparison and keep the lesser + if(cur_value < result_value) + { + node_stack.PopEvaluableNode(); + node_stack.PushEvaluableNode(cur); + + //replace previous result with cur + evaluableNodeManager->FreeNodeTreeIfPossible(result); + result = cur; + result_value = cur_value; + } + } + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_DOT_PRODUCT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + if(ocn.size() < 2) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(0.0), true); + + EvaluableNodeReference elements1 = InterpretNodeForImmediateUse(ocn[0]); + if(EvaluableNode::IsNull(elements1)) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(0.0), true); + + auto node_stack = CreateInterpreterNodeStackStateSaver(elements1); + EvaluableNodeReference elements2 = InterpretNodeForImmediateUse(ocn[1]); + node_stack.PopEvaluableNode(); + + if(EvaluableNode::IsNull(elements2)) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(0.0), true); + + bool elements1_assoc = elements1->IsAssociativeArray(); + bool elements2_assoc = elements2->IsAssociativeArray(); + + double dot_product = 0.0; + + if(!elements1_assoc && !elements2_assoc) + { + auto &ocn1 = elements1->GetOrderedChildNodes(); + auto &ocn2 = elements2->GetOrderedChildNodes(); + + size_t num_elements = std::min(ocn1.size(), ocn2.size()); + for(size_t i = 0; i < num_elements; i++) + dot_product += EvaluableNode::ToNumber(ocn1[i]) * EvaluableNode::ToNumber(ocn2[i]); + } + else //at least one is an assoc + { + //if not an assoc, then convert + if(!elements1_assoc) + { + if(!elements1.unique) + elements1.reference = evaluableNodeManager->AllocNode(elements1); + elements1->ConvertOrderedListToNumberedAssoc(); + } + + if(!elements2_assoc) + { + if(!elements2.unique) + elements2.reference = evaluableNodeManager->AllocNode(elements2); + elements2->ConvertOrderedListToNumberedAssoc(); + } + + auto &mcn1 = elements1->GetMappedChildNodes(); + auto &mcn2 = elements2->GetMappedChildNodes(); + + for(auto &[node1_id, node1] : mcn1) + { + //if a key isn't in both, then its value is zero + auto node2 = mcn2.find(node1_id); + if(node2 == end(mcn2)) + continue; + + dot_product += EvaluableNode::ToNumber(node1) * EvaluableNode::ToNumber(node2->second); + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(elements1); + evaluableNodeManager->FreeNodeTreeIfPossible(elements2); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(dot_product), true); +} + +//builds a vector of the values in the node, using ordered or mapped child nodes as appropriate +// if node is mapped child nodes, it will use id_order to order populate out and use default_value if any given id is not found +inline void GetChildNodesAsENImmediateValueArray(EvaluableNode *node, std::vector &id_order, + std::vector &out, std::vector &out_types) +{ + if(node != nullptr) + { + if(node->IsAssociativeArray()) + { + auto &wn_mcn = node->GetMappedChildNodesReference(); + out.resize(id_order.size()); + out_types.resize(id_order.size()); + for(size_t i = 0; i < id_order.size(); i++) + { + auto found_node = wn_mcn.find(id_order[i]); + if(found_node != end(wn_mcn)) + { + out_types[i] = out[i].CopyValueFromEvaluableNode(found_node->second); + } + else //not found, use default + { + out[i] = EvaluableNodeImmediateValue(0.0); + out_types[i] = ENIVT_NUMBER; + } + } + } + else if(node->IsImmediate()) + { + //fill in with the node's value + EvaluableNodeImmediateValue value; + EvaluableNodeImmediateValueType value_type = value.CopyValueFromEvaluableNode(node); + out.clear(); + out_types.clear(); + out.resize(id_order.size(), value); + out_types.resize(id_order.size(), value_type); + } + else //must be ordered + { + auto &node_ocn = node->GetOrderedChildNodesReference(); + + out.resize(node_ocn.size()); + out_types.resize(node_ocn.size()); + for(size_t i = 0; i < node_ocn.size(); i++) + out_types[i] = out[i].CopyValueFromEvaluableNode(node_ocn[i]); + } + } +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_GENERALIZED_DISTANCE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 6) + return EvaluableNodeReference::Null(); + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + + //get weights list if applicable + auto weights_node = InterpretNodeForImmediateUse(ocn[0]); + if(!EvaluableNode::IsNull(weights_node)) + node_stack.PushEvaluableNode(weights_node); + + //get distance types if applicable + auto distance_types_node = InterpretNodeForImmediateUse(ocn[1]); + if(!EvaluableNode::IsNull(distance_types_node)) + node_stack.PushEvaluableNode(distance_types_node); + + //get feature attributes if applicable + auto attributes_node = InterpretNodeForImmediateUse(ocn[2]); + if(!EvaluableNode::IsNull(attributes_node)) + node_stack.PushEvaluableNode(attributes_node); + + //get deviations if applicable + auto deviations_node = InterpretNodeForImmediateUse(ocn[3]); + if(!EvaluableNode::IsNull(deviations_node)) + node_stack.PushEvaluableNode(deviations_node); + + GeneralizedDistance dist_params; + + dist_params.pValue = InterpretNodeIntoNumberValue(ocn[4]); + + //get location + auto location_node = InterpretNodeForImmediateUse(ocn[5]); + if(!EvaluableNode::IsNull(location_node)) + node_stack.PushEvaluableNode(location_node); + + //get origin if applicable + EvaluableNodeReference origin_node = EvaluableNodeReference::Null(); + if(ocn.size() > 6) + { + origin_node = InterpretNodeForImmediateUse(ocn[6]); + if(!EvaluableNode::IsNull(origin_node)) + node_stack.PushEvaluableNode(origin_node); + } + + //get value_names if applicable + std::vector value_names; + if(ocn.size() > 8) + { + EvaluableNodeReference value_names_node = InterpretNodeForImmediateUse(ocn[8]); + if(!EvaluableNode::IsNull(value_names_node)) + { + //extract the names for each value into value_names + auto &vnn_ocn = value_names_node->GetOrderedChildNodes(); + value_names.reserve(vnn_ocn.size()); + for(auto &vn : vnn_ocn) + { + StringInternPool::StringID label_sid = EvaluableNode::ToStringIDIfExists(vn); + if(label_sid != string_intern_pool.NOT_A_STRING_ID) + value_names.push_back(label_sid); + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(value_names_node); + } + + //get the origin and destination + std::vector location; + std::vector location_types; + GetChildNodesAsENImmediateValueArray(location_node, value_names, location, location_types); + + std::vector origin; + std::vector origin_types; + GetChildNodesAsENImmediateValueArray(origin_node, value_names, origin, origin_types); + + //resize everything to the proper number of elements, fill in with zeros + size_t num_elements = std::max(std::max(location.size(), origin.size()), value_names.size()); + location.resize(num_elements, 0.0); + location_types.resize(num_elements, ENIVT_NUMBER); + origin.resize(num_elements, 0.0); + origin_types.resize(num_elements, ENIVT_NUMBER); + + EntityQueryBuilder::PopulateDistanceFeatureParameters(dist_params, num_elements, value_names, + weights_node, distance_types_node, attributes_node, deviations_node); + + //done with all values + evaluableNodeManager->FreeNodeTreeIfPossible(weights_node); + evaluableNodeManager->FreeNodeTreeIfPossible(distance_types_node); + evaluableNodeManager->FreeNodeTreeIfPossible(attributes_node); + evaluableNodeManager->FreeNodeTreeIfPossible(deviations_node); + + dist_params.highAccuracy = true; + dist_params.recomputeAccurateDistances = false; + dist_params.SetAndConstrainParams(); + + //convert unknown differences into unknown distance terms + for(size_t i = 0; i < num_elements; i++) + { + auto &feature_params = dist_params.featureParams[i]; + + //if one is nan and the other is not, the use the non-nan one for both + if(FastIsNaN(feature_params.unknownToUnknownDifference)) + { + if(!FastIsNaN(feature_params.knownToUnknownDifference)) + feature_params.unknownToUnknownDifference = feature_params.knownToUnknownDifference; + else + feature_params.unknownToUnknownDifference = dist_params.GetMaximumDifference(i); + } + + if(FastIsNaN(feature_params.knownToUnknownDifference)) + feature_params.knownToUnknownDifference = feature_params.unknownToUnknownDifference; + + dist_params.ComputeAndStoreUncertaintyDistanceTerms(i); + } + + double value = dist_params.ComputeMinkowskiDistance(location, location_types, origin, origin_types); + + //free these after computation in case they had any code being used/referenced in the distance + evaluableNodeManager->FreeNodeTreeIfPossible(location_node); + evaluableNodeManager->FreeNodeTreeIfPossible(origin_node); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(value), true); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ENTROPY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(0.0), true); + + //get first list of probabilities, p + bool p_is_constant = false; + double p_constant_value = 0.0; + + bool p_is_assoc = false; + size_t p_num_elements = std::numeric_limits::max(); + + //if the evaluable node for p is a list, then p_values will reference its list, + // otherwise if it is an assoc array, it will populate p_copied_values and have p_values point to it + std::vector *p_values; + std::vector p_copied_values; + + auto p_node = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(p_node); + + if(EvaluableNode::IsAssociativeArray(p_node)) + { + auto &p_node_mcn = p_node->GetMappedChildNodesReference(); + p_is_assoc = true; + p_num_elements = p_node_mcn.size(); + + p_values = &p_copied_values; + p_copied_values.reserve(p_num_elements); + for(auto &[_, ce] : p_node_mcn) + p_copied_values.push_back(ce); + } + else if(EvaluableNode::IsOrderedArray(p_node)) + { + auto &p_node_ocn = p_node->GetOrderedChildNodesReference(); + p_num_elements = p_node_ocn.size(); + p_values = &p_node_ocn; + } + else //not an assoc or list, so treat as a constant probability instead + { + p_is_constant = true; + p_constant_value = EvaluableNode::ToNumber(p_node); + } + + //exponents are affected if we have two distributions specified + bool have_q_distribution = false; + + //get second list of propbabilities, q + bool q_is_constant = false; + double q_constant_value = 0.0; + + size_t q_num_elements = std::numeric_limits::max(); + + //if the evaluable node for q is a list, then q_values will reference its list, + // otherwise if it is an assoc array, it will populate q_copied_values and have q_values point to it + std::vector *q_values = nullptr; + std::vector q_copied_values; + + auto q_node = EvaluableNodeReference::Null(); + if(ocn.size() >= 2) + { + //comparison so use positive sign + have_q_distribution = true; + q_node = InterpretNodeForImmediateUse(ocn[1]); + node_stack.PushEvaluableNode(q_node); + + if(EvaluableNode::IsAssociativeArray(q_node)) + { + q_num_elements = q_node->GetMappedChildNodes().size(); + + q_values = &q_copied_values; + + //because p is the parameter in front and if it is 0, then none of the rest of the term matters, + // we should use p's index list to populate q's values + if(p_is_assoc) + { + q_copied_values.reserve(p_num_elements); + for(auto &[pce_id, _] : p_node->GetMappedChildNodes()) + { + auto q_i = q_node->GetMappedChildNodes().find(pce_id); + if(q_i == end(q_node->GetMappedChildNodes())) + continue; + q_copied_values.push_back(q_i->second); + } + } + else if(p_is_constant) + { + q_copied_values.reserve(q_num_elements); + for(auto &[_, ce] : q_node->GetMappedChildNodes()) + q_copied_values.push_back(ce); + } + else //p must be a list + { + q_copied_values.reserve(p_num_elements); + for(size_t index = 0; index < p_num_elements; index++) + { + StringInternPool::StringID key_sid = EvaluableNode::ToStringIDIfExists((*p_values)[index]); + + EvaluableNode **found = q_node->GetMappedChildNode(key_sid); + if(found != nullptr) + q_copied_values.push_back(*found); + } + } + } + else if(EvaluableNode::IsOrderedArray(q_node)) + { + q_num_elements = q_node->GetOrderedChildNodes().size(); + q_values = &q_node->GetOrderedChildNodes(); + } + else //not an assoc or list, so treat as a constant probability instead + { + q_is_constant = true; + q_constant_value = EvaluableNode::ToNumber(q_node); + } + } + + //if both are constants, then have no entropy (no probability mass), so return 0 + if((p_is_constant || p_num_elements == std::numeric_limits::max()) + && (q_is_constant || q_num_elements == std::numeric_limits::max())) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(0.0), true); + + //now that have the size of both p and q, can compute constant values if applicable + //if p_node is null then compute a constant value + if(EvaluableNode::IsNull(p_node)) + { + p_is_constant = true; + p_constant_value = 1.0 / q_num_elements; + } + + //if p_node is null then compute a constant value + if(EvaluableNode::IsNull(q_node)) + { + q_is_constant = true; + q_constant_value = 1.0 / p_num_elements; + } + + //get optional exponent parameters + double p_exponent = 1; + //if have a second distribution, then default to kl divergence, with each term q_i/p_i + if(have_q_distribution) + p_exponent = -1; + if(ocn.size() >= 3) + p_exponent = InterpretNodeIntoNumberValue(ocn[2]); + + //if exponent is 0, then all values will be 1 + if(p_exponent == 0) + { + p_is_constant = true; + p_constant_value = 1; + } + + double q_exponent = 0; + //default to KL divergence with each term of q_i/p_i + if(ocn.size() >= 2) + q_exponent = 1; + //override if specified + if(ocn.size() >= 4) + q_exponent = InterpretNodeIntoNumberValue(ocn[3]); + + //if exponent is 0, then all values will be 1 + if(q_exponent == 0) + { + q_is_constant = true; + q_constant_value = 1; + } + + //finally can compute entropy + size_t num_elements = std::min(p_num_elements, q_num_elements); + double accumulated_entropy = 0.0; + + for(size_t i = 0; i < num_elements; i++) + { + //get the original p_i value to multiply out in front + double p_i_first_term; + if(p_is_constant) + p_i_first_term = p_constant_value; + else + p_i_first_term = EvaluableNode::ToNumber((*p_values)[i]); + + //in entropy calculations, always exit early if p_i is 0 even if the subsequent terms blow up + if(p_i_first_term <= 0) + continue; + + //exponentiate p_i if applicable (note that exponent of 0 is covered earlier in the code) + double p_i_exponentiated = p_i_first_term; + if(p_exponent == -1) + p_i_exponentiated = 1 / p_i_exponentiated; + else if(p_exponent != 1) + p_i_exponentiated = std::pow(p_i_exponentiated, p_exponent); + + double q_i; + if(q_is_constant) + q_i = q_constant_value; + else + q_i = EvaluableNode::ToNumber((*q_values)[i]); + + //exponentiate q_i if applicable (note that exponent of 0 is covered earlier in the code) + if(q_exponent == 0) + q_i = 1; + else if(q_exponent == -1) + q_i = 1 / q_i; + else if(q_exponent != 1) + q_i = std::pow(q_i, q_exponent); + + accumulated_entropy += p_i_first_term * std::log(p_i_exponentiated * q_i); + } + + //clean up + node_stack.PopEvaluableNode(); + evaluableNodeManager->FreeNodeTreeIfPossible(p_node); + evaluableNodeManager->FreeNodeTreeIfPossible(q_node); + + //negate + accumulated_entropy = -accumulated_entropy; + + //in rare cases where the values in either p or q may not add up exactly to 1 due to floating point percision, and where the values in q + //are larger than the values in p, the resulting value may wind up being a tiny negative, but since information gain cannot be negative, + //we take the max of the result and 0 + accumulated_entropy = std::max(0.0, accumulated_entropy); + + return EvaluableNodeReference(evaluableNodeManager->AllocNode(accumulated_entropy), true); +} diff --git a/src/Amalgam/interpreter/InterpreterOpcodesTransformations.cpp b/src/Amalgam/interpreter/InterpreterOpcodesTransformations.cpp new file mode 100644 index 00000000..22ed5d4a --- /dev/null +++ b/src/Amalgam/interpreter/InterpreterOpcodesTransformations.cpp @@ -0,0 +1,1726 @@ +//project headers: +#include "Interpreter.h" + +#include "AmalgamVersion.h" +#include "AssetManager.h" +#include "EntityManipulation.h" +#include "EntityQueries.h" +#include "EntityQueryManager.h" +#include "EvaluableNodeTreeFunctions.h" +#include "EvaluableNodeTreeManipulation.h" +#include "EvaluableNodeTreeDifference.h" +#include "PerformanceProfiler.h" + +//system headers: +#include +#include +#include +#include +#include +#include +#include + +EvaluableNodeReference Interpreter::InterpretNode_ENT_REWRITE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto function = InterpretNodeForImmediateUse(ocn[0]); + if(function == nullptr) + return EvaluableNodeReference::Null(); + auto node_stack = CreateInterpreterNodeStackStateSaver(function); + + //get tree and make a copy so it can be modified in-place + auto to_modify = InterpretNode(ocn[1]); + if(to_modify == nullptr) + return EvaluableNodeReference::Null(); + + if(!to_modify.unique) + to_modify = evaluableNodeManager->DeepAllocCopy(to_modify); + node_stack.PushEvaluableNode(to_modify); + + //apply rewrite function + //pass value of list to be mapped + PushNewConstructionContext(to_modify, nullptr, EvaluableNodeImmediateValueWithType(), to_modify); + + EvaluableNode::ReferenceSetType references; + EvaluableNode *result = RewriteByFunction(function, to_modify, to_modify, references); + + PopConstructionContext(); + + EvaluableNodeManager::UpdateFlagsForNodeTree(result, references); + + return EvaluableNodeReference(result, false); //can't make any guarantees about the new code +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_MAP(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto function = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(function); + + EvaluableNodeReference result = EvaluableNodeReference::Null(); + + if(ocn.size() == 2) + { + //get list + auto list = InterpretNode(ocn[1]); + if(list == nullptr) + return EvaluableNodeReference::Null(); + + //if it's the only reference of the list (and it doesn't refer back to itself), then just reuse it for the output + if(list.unique && !list->GetNeedCycleCheck()) + result = list; + else //the list is used elsewhere, so need to create a new one + result = EvaluableNodeReference(evaluableNodeManager->AllocNode(list), true); //starts out cycle free unless attach something cyclic or not unique + + if(list->IsOrderedArray()) + { + auto &list_ocn = list->GetOrderedChildNodesReference(); + + #ifdef MULTITHREAD_SUPPORT + size_t num_nodes = list_ocn.size(); + if(en->GetConcurrency() && num_nodes > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + node_stack.PushEvaluableNode(list); + node_stack.PushEvaluableNode(result); + + ConcurrencyManager concurrency_manager(this, num_nodes); + + for(size_t node_index = 0; node_index < num_nodes; node_index++) + concurrency_manager.PushTaskToResultFuturesWithConstructionStack(function, + list, result, EvaluableNodeImmediateValueWithType(static_cast(node_index)), list_ocn[node_index]); + + enqueue_task_lock.Unlock(); + + concurrency_manager.EndConcurrency(); + + //filter by those child nodes that are true + auto evaluations = concurrency_manager.GetResultsAndFreeReferences(); + auto &result_ocn = result->GetOrderedChildNodes(); + for(size_t i = 0; i < num_nodes; i++) + { + result_ocn[i] = evaluations[i]; + result.UpdatePropertiesBasedOnAttachedNode(evaluations[i]); + } + + return result; + } + } + #endif + + PushNewConstructionContext(list, result, EvaluableNodeImmediateValueWithType(0.0), nullptr); + + auto &result_ocn = result->GetOrderedChildNodesReference(); + for(size_t i = 0; i < list_ocn.size(); i++) + { + //pass value of list to be mapped + SetTopTargetValueIndexInConstructionStack(static_cast(i)); + SetTopTargetValueReferenceInConstructionStack(list_ocn[i]); + + EvaluableNodeReference element_result = InterpretNode(function); + result_ocn[i] = element_result; + result.UpdatePropertiesBasedOnAttachedNode(element_result); + } + + PopConstructionContext(); + } + else if(list->IsAssociativeArray()) + { + //result_mcn is either the same as list_mcn or a copy of it + auto &result_mcn = result->GetMappedChildNodesReference(); + + #ifdef MULTITHREAD_SUPPORT + size_t num_nodes = result_mcn.size(); + if(en->GetConcurrency() && num_nodes > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + node_stack.PushEvaluableNode(list); + node_stack.PushEvaluableNode(result); + + ConcurrencyManager concurrency_manager(this, num_nodes); + + for(auto &[node_id, node] : result_mcn) + concurrency_manager.PushTaskToResultFuturesWithConstructionStack(function, + list, result, EvaluableNodeImmediateValueWithType(node_id), node); + + enqueue_task_lock.Unlock(); + + concurrency_manager.EndConcurrency(); + + //filter by those child nodes that are true + auto evaluations = concurrency_manager.GetResultsAndFreeReferences(); + size_t node_index = 0; + for(auto &[cn_id, cn] : result_mcn) + { + cn = evaluations[node_index]; + result.UpdatePropertiesBasedOnAttachedNode(evaluations[node_index]); + node_index++; + } + + return result; + } + } + #endif + + PushNewConstructionContext(list, result, EvaluableNodeImmediateValueWithType(StringInternPool::NOT_A_STRING_ID), nullptr); + + for(auto &[cn_id, cn] : result_mcn) + { + SetTopTargetValueIndexInConstructionStack(cn_id); + SetTopTargetValueReferenceInConstructionStack(cn); + + EvaluableNodeReference element_result = InterpretNode(function); + cn = element_result; + result.UpdatePropertiesBasedOnAttachedNode(element_result); + } + + PopConstructionContext(); + } + } + else //multiple inputs + { + EvaluableNode *inputs_list_node = evaluableNodeManager->AllocNode(ENT_LIST); + inputs_list_node->SetOrderedChildNodesSize(ocn.size() - 1); + auto &inputs = inputs_list_node->GetOrderedChildNodes(); + + //process inputs, get size and whether needs to be associative array + bool need_assoc = false; + + //note that all_keys will maintain references to each StringID that must be freed + FastHashSet all_keys; //only if have assoc + size_t largest_size = 0; //only if have list + + node_stack.PushEvaluableNode(inputs_list_node); + for(size_t i = 0; i < ocn.size() - 1; i++) + { + inputs[i] = InterpretNode(ocn[i + 1]); + if(inputs[i] != nullptr) + { + if(!inputs[i]->IsAssociativeArray()) + { + largest_size = std::max(largest_size, inputs[i]->GetOrderedChildNodes().size()); + } + else + { + need_assoc = true; + for(auto &[n_id, _] : inputs[i]->GetMappedChildNodes()) + { + auto [inserted_node, inserted] = all_keys.insert(n_id); + //if it was inserted, then need to keep track of the string reference + if(inserted) + string_intern_pool.CreateStringReference(n_id); + } + } + } + } + node_stack.PopEvaluableNode(); + + if(!need_assoc) + { + result = EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_LIST), true); + result->GetOrderedChildNodes().resize(largest_size); + + PushNewConstructionContext(inputs_list_node, result, EvaluableNodeImmediateValueWithType(0.0), nullptr); + + for(size_t index = 0; index < largest_size; index++) + { + //set index value + SetTopTargetValueIndexInConstructionStack(static_cast(index)); + + //combine input slices together into value + EvaluableNode *input_slice = evaluableNodeManager->AllocNode(ENT_LIST); + auto &is_ocn = input_slice->GetOrderedChildNodes(); + is_ocn.resize(inputs.size()); + for(size_t i = 0; i < inputs.size(); i++) + { + if(inputs[i] == nullptr || index >= inputs[i]->GetOrderedChildNodes().size()) + { + is_ocn[i] = nullptr; + continue; + } + is_ocn[i] = inputs[i]->GetOrderedChildNodes()[index]; + } + SetTopTargetValueReferenceInConstructionStack(input_slice); + + EvaluableNodeReference element_result = InterpretNode(function); + result->GetOrderedChildNodes()[index] = element_result; + result.UpdatePropertiesBasedOnAttachedNode(element_result); + } + + PopConstructionContext(); + } + else //need associative array + { + result = EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_ASSOC), true); + result->ReserveMappedChildNodes(largest_size + all_keys.size()); + + PushNewConstructionContext(inputs_list_node, result, EvaluableNodeImmediateValueWithType(0.0), nullptr); + + //do any numbers from lists first + for(size_t index = 0; index < largest_size; index++) + { + //set index value + SetTopTargetValueIndexInConstructionStack(static_cast(index)); + + //combine input slices together into value + EvaluableNode *input_slice = evaluableNodeManager->AllocNode(ENT_LIST); + auto &is_ocn = input_slice->GetOrderedChildNodes(); + is_ocn.resize(inputs.size()); + for(size_t i = 0; i < inputs.size(); i++) + { + if(inputs[i] == nullptr) + { + is_ocn[i] = nullptr; + } + else if(inputs[i]->IsAssociativeArray()) + { + const std::string index_string = EvaluableNode::NumberToString(index); + EvaluableNode **found = inputs[i]->GetMappedChildNode(index_string); + if(found != nullptr) + is_ocn[i] = *found; + } + else //list + { + if(index < inputs[i]->GetOrderedChildNodes().size()) + is_ocn[i] = inputs[i]->GetOrderedChildNodes()[index]; + } + } + SetTopTargetValueReferenceInConstructionStack(input_slice); + + EvaluableNodeReference element_result = InterpretNode(function); + std::string index_string = EvaluableNode::NumberToString(index); + result->SetMappedChildNode(index_string, element_result); + + result.UpdatePropertiesBasedOnAttachedNode(element_result); + + //remove from keys so it isn't clobbered when checking assoc keys + StringInternPool::StringID index_sid = string_intern_pool.GetIDFromString(index_string); + if(all_keys.erase(index_sid)) + string_intern_pool.DestroyStringReference(index_sid); + } + + //now perform for all assocs + for(auto &index_sid : all_keys) + { + //set index value + SetTopTargetValueIndexInConstructionStack(index_sid); + + //combine input slices together into value + EvaluableNode *input_slice = evaluableNodeManager->AllocNode(ENT_LIST); + auto &is_ocn = input_slice->GetOrderedChildNodesReference(); + is_ocn.resize(inputs.size()); + for(size_t i = 0; i < inputs.size(); i++) + { + //dealt with lists previously, only assoc in this pass + if(!EvaluableNode::IsAssociativeArray(inputs[i])) + is_ocn[i] = nullptr; + else + { + EvaluableNode **found = inputs[i]->GetMappedChildNode(index_sid); + if(found != nullptr) + is_ocn[i] = *found; + } + } + SetTopTargetValueReferenceInConstructionStack(input_slice); + + EvaluableNodeReference element_result = InterpretNode(function); + result->SetMappedChildNode(index_sid, element_result); + result.UpdatePropertiesBasedOnAttachedNode(element_result); + } + + PopConstructionContext(); + + } //needed to process as assoc array + + //free all references + string_intern_pool.DestroyStringReferences(all_keys); + } + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_FILTER(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() == 0) + return EvaluableNodeReference::Null(); + + if(ocn.size() == 1) + { + //get list + auto list = InterpretNode(ocn[0]); + if(list == nullptr) + { + evaluableNodeManager->FreeNodeTreeIfPossible(list); + return EvaluableNodeReference::Null(); + } + + EvaluableNodeReference result_list(list, list.unique); + + //need to edit the list itself, so if not unique, make at least the top node unique + if(!result_list.unique) + result_list.reference = evaluableNodeManager->AllocNode(list); + + if(result_list->IsAssociativeArray()) + { + auto &result_list_mcn = result_list->GetMappedChildNodesReference(); + + std::vector ids_to_remove; + for(auto &[cn_id, cn] : result_list_mcn) + { + if(EvaluableNode::IsEmptyNode(cn)) + ids_to_remove.push_back(cn_id); + } + + string_intern_pool.DestroyStringReferences(ids_to_remove); + if(result_list.unique && !result_list->GetNeedCycleCheck()) + { + //FreeNodeTree and erase the key + for(auto &id : ids_to_remove) + { + auto pair = result_list_mcn.find(id); + evaluableNodeManager->FreeNodeTree(pair->second); + result_list_mcn.erase(pair); + } + } + else //can't safely delete any nodes + { + for(auto &id : ids_to_remove) + result_list_mcn.erase(id); + } + } + else if(result_list->IsOrderedArray()) + { + auto &result_list_ocn = result_list->GetOrderedChildNodesReference(); + + if(result_list.unique && !result_list->GetNeedCycleCheck()) + { + //for any nodes to be erased, FreeNodeTree and erase the index + for(size_t i = result_list_ocn.size(); i > 0; i--) + { + size_t index = i - 1; + if(!EvaluableNode::IsEmptyNode(result_list_ocn[index])) + continue; + + evaluableNodeManager->FreeNodeTree(result_list_ocn[index]); + result_list_ocn.erase(begin(result_list_ocn) + index); + } + } + else //can't safely delete any nodes + { + auto new_end = std::remove_if(begin(result_list_ocn), end(result_list_ocn), + [](EvaluableNode *en) { return EvaluableNode::IsEmptyNode(en); }); + result_list_ocn.erase(new_end, end(result_list_ocn)); + } + } + + return result_list; + } + + auto function = InterpretNodeForImmediateUse(ocn[0]); + auto node_stack = CreateInterpreterNodeStackStateSaver(function); + + //get list + auto list = InterpretNode(ocn[1]); + //if null, just return a new null, since it has no child nodes + if(list == nullptr) + return EvaluableNodeReference::Null(); + + //create result_list as a copy of the current list, but clear out child nodes + EvaluableNodeReference result_list(evaluableNodeManager->AllocNode(list->GetType()), list.unique); + + if(EvaluableNode::IsNull(function)) + return result_list; + + if(list->GetOrderedChildNodes().size() > 0) + { + auto &list_ocn = list->GetOrderedChildNodes(); + auto &result_ocn = result_list->GetOrderedChildNodes(); + + #ifdef MULTITHREAD_SUPPORT + size_t num_nodes = list_ocn.size(); + if(en->GetConcurrency() && num_nodes > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + node_stack.PushEvaluableNode(list); + node_stack.PushEvaluableNode(result_list); + + ConcurrencyManager concurrency_manager(this, num_nodes); + + for(size_t node_index = 0; node_index < num_nodes; node_index++) + concurrency_manager.PushTaskToResultFuturesWithConstructionStack(function, + list, result_list, EvaluableNodeImmediateValueWithType(static_cast(node_index)), list_ocn[node_index]); + + enqueue_task_lock.Unlock(); + + concurrency_manager.EndConcurrency(); + + //filter by those child nodes that are true + auto evaluations = concurrency_manager.GetResultsAndFreeReferences(); + for(size_t i = 0; i < num_nodes; i++) + { + if(EvaluableNode::IsTrue(evaluations[i])) + result_ocn.push_back(list_ocn[i]); + + evaluableNodeManager->FreeNodeTreeIfPossible(evaluations[i]); + } + } + } + else +#endif + //need this in a block for multithreading above + { + PushNewConstructionContext(list, result_list, EvaluableNodeImmediateValueWithType(0.0), nullptr); + + //iterate over all child nodes + for(size_t i = 0; i < list_ocn.size(); i++) + { + EvaluableNode *cur_value = list_ocn[i]; + + SetTopTargetValueIndexInConstructionStack(static_cast(i)); + SetTopTargetValueReferenceInConstructionStack(cur_value); + + //check current element + if(InterpretNodeIntoBoolValue(function)) + result_ocn.push_back(cur_value); + } + + PopConstructionContext(); + + //free anything not in filtered list + // need to do this outside of the iteration loop in case anything is accessing the original list + if(list.unique && !list->GetNeedCycleCheck()) + { + size_t result_index = 0; + for(size_t i = 0; i < list_ocn.size(); i++) + { + //if there are still results left, check if it matches + if(result_index < result_ocn.size() && list_ocn[i] == result_ocn[result_index]) + result_index++; + else //free it + evaluableNodeManager->FreeNodeTree(list_ocn[i]); + } + } + } + + evaluableNodeManager->FreeNodeIfPossible(list); + return result_list; + } + + if(list->IsAssociativeArray()) + { + auto &list_mcn = list->GetMappedChildNodesReference(); + + #ifdef MULTITHREAD_SUPPORT + size_t num_nodes = list_mcn.size(); + if(en->GetConcurrency() && num_nodes > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + node_stack.PushEvaluableNode(list); + node_stack.PushEvaluableNode(result_list); + + ConcurrencyManager concurrency_manager(this, num_nodes); + + //kick off interpreters + for(auto &[node_id, node] : list_mcn) + concurrency_manager.PushTaskToResultFuturesWithConstructionStack(function, + list, result_list, EvaluableNodeImmediateValueWithType(node_id), node); + + enqueue_task_lock.Unlock(); + + concurrency_manager.EndConcurrency(); + + //filter by those child nodes that are true + auto evaluations = concurrency_manager.GetResultsAndFreeReferences(); + + //iterate in same order with same node_index + size_t node_index = 0; + for(auto &[node_id, node] : list_mcn) + { + if(EvaluableNode::IsTrue(evaluations[node_index])) + result_list->SetMappedChildNode(node_id, node); + + evaluableNodeManager->FreeNodeTreeIfPossible(evaluations[node_index]); + + node_index++; + } + + node_stack.PopEvaluableNode(); + node_stack.PopEvaluableNode(); + evaluableNodeManager->FreeNodeIfPossible(list); + return result_list; + } + } + #endif + + PushNewConstructionContext(list, result_list, EvaluableNodeImmediateValueWithType(StringInternPool::NOT_A_STRING_ID), nullptr); + + //result_list is a copy of list, so it should already be the same size (no need to reserve) + for(auto &[cn_id, cn] : list_mcn) + { + SetTopTargetValueIndexInConstructionStack(cn_id); + SetTopTargetValueReferenceInConstructionStack(cn); + + //if contained, add to result_list (and let SetMappedChildNode create the string reference) + if(InterpretNodeIntoBoolValue(function)) + result_list->SetMappedChildNode(cn_id, cn); + } + + PopConstructionContext(); + } + + evaluableNodeManager->FreeNodeIfPossible(list); + return result_list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_WEAVE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + size_t num_params = ocn.size(); + if(num_params < 1) + return EvaluableNodeReference::Null(); + + //single list, return itself + if(ocn.size() == 1) + return InterpretNode(ocn[0]); + + //get the index of the first list to weave based on how many parameters there are + size_t index_of_first_list = 0; + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + + //if a function is specified, then set up appropriate data structures to call the function and move the indices for the index and value parameters + EvaluableNodeReference function = EvaluableNodeReference::Null(); + if(num_params >= 3) + { + index_of_first_list++; + + //need to interpret node here in case function is actually a null + // null is a special non-function for weave + function = InterpretNode(ocn[0]); + node_stack.PushEvaluableNode(function); + } + + //interpret all the lists, need to keep those around that are nulls because it ensures that the nulls should be interleaved + // when a function is not passed in and it ensures that index of the parameters matches the index of the _ variable + std::vector lists(num_params - index_of_first_list); + for(size_t list_index = index_of_first_list; list_index < num_params; list_index++) + { + lists[list_index - index_of_first_list] = InterpretNode(ocn[list_index]); + node_stack.PushEvaluableNode(lists[list_index - index_of_first_list]); + } + + //find the largest of all the lists and the total number of elements + size_t maximum_list_size = 0; + size_t total_num_elements = 0; + for(auto &list : lists) + { + if(list != nullptr) + { + size_t num_elements = list->GetOrderedChildNodes().size(); + maximum_list_size = std::max(maximum_list_size, num_elements); + total_num_elements += num_elements; + } + } + + //the result + EvaluableNodeReference woven_list(evaluableNodeManager->AllocNode(ENT_LIST), true); + + //just lists, interleave + if(function == nullptr) + { + woven_list->ReserveOrderedChildNodes(total_num_elements); + + //for every index, iterate over every list and if there is an element, put it in the woven list + for(size_t list_index = 0; list_index < maximum_list_size; list_index++) + { + for(auto &list : lists) + { + //if immediate, then write out immediate + if(list == nullptr || IsEvaluableNodeTypeImmediate(list->GetType())) + woven_list->AppendOrderedChildNode(list); + else if(list->GetOrderedChildNodes().size() > list_index) //only write out if list is long enough + woven_list->AppendOrderedChildNode(list->GetOrderedChildNodes()[list_index]); + } + } + + EvaluableNodeManager::UpdateFlagsForNodeTree(woven_list); + return woven_list; + } + + //for every index, iterate over every list and call the function + for(size_t list_index = 0; list_index < maximum_list_size; list_index++) + { + //get all of the values + EvaluableNode *list_index_values_node = evaluableNodeManager->AllocNode(ENT_LIST); + list_index_values_node->ReserveOrderedChildNodes(lists.size()); + for(auto &list : lists) + { + //if immediate, then write out immediate + if(list == nullptr || IsEvaluableNodeTypeImmediate(list->GetType())) + list_index_values_node->AppendOrderedChildNode(list); + else if(list->GetOrderedChildNodes().size() > list_index) + list_index_values_node->AppendOrderedChildNode(list->GetOrderedChildNodes()[list_index]); + else //there's no value, so append null so that at least the function can see it + list_index_values_node->AppendOrderedChildNode(nullptr); + } + + PushNewConstructionContext(nullptr, woven_list, EvaluableNodeImmediateValueWithType(static_cast(list_index)), list_index_values_node); + + EvaluableNodeReference values_to_weave = InterpretNode(function); + + PopConstructionContext(); + + if(values_to_weave == nullptr) + { + woven_list->AppendOrderedChildNode(nullptr); + continue; + } + + //append as if it were a list + for(EvaluableNode *cn : values_to_weave->GetOrderedChildNodes()) + woven_list->AppendOrderedChildNode(cn); + if(values_to_weave->GetOrderedChildNodes().size() > 0) + woven_list.UpdatePropertiesBasedOnAttachedNode(values_to_weave); + + //the rest of the values have been copied over, so only the top node is potentially freeable + evaluableNodeManager->FreeNodeIfPossible(values_to_weave); + } + + return woven_list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_REDUCE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto function = InterpretNodeForImmediateUse(ocn[0]); + if(function == nullptr) + return EvaluableNodeReference::Null(); + + auto node_stack = CreateInterpreterNodeStackStateSaver(function); + + //get list + auto list = InterpretNode(ocn[1]); + if(list == nullptr) + return EvaluableNodeReference::Null(); + + EvaluableNodeReference cur_value = EvaluableNodeReference::Null(); + + if(list->IsAssociativeArray()) + { + bool first_node = (cur_value == nullptr); + //iterate over list + for(auto &[n_id, n] : list->GetMappedChildNodesReference()) + { + //grab a value if first one + if(first_node) + { + cur_value = EvaluableNodeReference(n, false); //can't make any guarantees because used in a function + first_node = false; + continue; + } + + //pass values to be mapped + PushNewConstructionContext(nullptr, list, EvaluableNodeImmediateValueWithType(), cur_value); + PushNewConstructionContext(nullptr, list, EvaluableNodeImmediateValueWithType(n_id), n); + + EvaluableNodeReference new_value = InterpretNode(function); + + PopConstructionContext(); + PopConstructionContext(); + + //move the current value over + cur_value = new_value; + } + } + else if(list->GetOrderedChildNodes().size() >= 1) + { + auto &list_ocn = list->GetOrderedChildNodes(); + cur_value = EvaluableNodeReference(list_ocn[0], false); //can't make any guarantees because used in a function + + //iterate over list + for(size_t i = 1; i < list_ocn.size(); i++) + { + //pass values to be mapped + PushNewConstructionContext(nullptr, list, EvaluableNodeImmediateValueWithType(), cur_value); + PushNewConstructionContext(nullptr, list, EvaluableNodeImmediateValueWithType(static_cast(i)), list_ocn[i]); + + EvaluableNodeReference new_value = InterpretNode(function); + + PopConstructionContext(); + PopConstructionContext(); + + //move the current value over + cur_value = new_value; + } + } + + return cur_value; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_APPLY(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //get the target + auto source = InterpretNode(ocn[1]); + if(source == nullptr) + return EvaluableNodeReference::Null(); + + if(!source.unique) + source.reference = evaluableNodeManager->AllocNode(source); + + auto node_stack = CreateInterpreterNodeStackStateSaver(source); + + //get the type to set + EvaluableNodeType new_type = ENT_NULL; + auto type_node = InterpretNodeForImmediateUse(ocn[0]); + if(type_node != nullptr) + { + if(type_node->GetType() == ENT_STRING) + { + std::string new_type_string = EvaluableNode::ToString(type_node); + new_type = GetEvaluableNodeTypeFromString(new_type_string, true); + evaluableNodeManager->FreeNodeTreeIfPossible(type_node); + } + else + { + new_type = type_node->GetType(); + + //see if need to prepend anything to the source before changing type + if(type_node->GetOrderedChildNodes().size() == 0) + evaluableNodeManager->FreeNodeTreeIfPossible(type_node); + else //prepend the parameters of source + { + source->GetOrderedChildNodes().insert(begin(source->GetOrderedChildNodes()), begin(type_node->GetOrderedChildNodes()), end(type_node->GetOrderedChildNodes())); + source.UpdatePropertiesBasedOnAttachedNode(type_node); + } + } + } + + source->SetType(new_type, evaluableNodeManager); + + //apply the new type, using whether or not it was a unique reference + EvaluableNodeReference result = InterpretNode(source); + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_REVERSE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //get the list to reverse + auto list = InterpretNode(ocn[0]); + if(list == nullptr) + return EvaluableNodeReference::Null(); + + //make sure it is an editable copy + if(!list.unique) + list.reference = evaluableNodeManager->AllocNode(list); + + auto &list_ocn = list->GetOrderedChildNodes(); + std::reverse(begin(list_ocn), end(list_ocn)); + + return list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_SORT(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + if(ocn.size() == 1) + { + //get list + auto list = InterpretNode(ocn[0]); + if(list == nullptr) + return EvaluableNodeReference::Null(); + + //make sure it is an editable copy + if(!list.unique) + list.reference = evaluableNodeManager->AllocNode(list); + + std::sort(begin(list->GetOrderedChildNodes()), end(list->GetOrderedChildNodes()), EvaluableNode::IsStrictlyLessThan); + + return list; + } + else + { + //get function to apply to list + auto function = InterpretNodeForImmediateUse(ocn[0]); + if(function == nullptr) + return EvaluableNodeReference::Null(); + + auto node_stack = CreateInterpreterNodeStackStateSaver(function); + + //get list + auto list = InterpretNode(ocn[1]); + if(list == nullptr) + return EvaluableNodeReference::Null(); + + //make sure it is an editable copy + if(!list.unique) + list.reference = evaluableNodeManager->AllocNode(list); + + CustomEvaluableNodeComparator comparator(this, function, list); + + //sort list; can't use the C++ sort function because it requires weak ordering and will crash otherwise + // the custom comparator does not guarantee this + std::vector sorted = CustomEvaluableNodeOrderedChildNodesSort(list->GetOrderedChildNodes(), comparator); + list->SetOrderedChildNodes(sorted); + + return list; + } +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_INDICES(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + //get assoc array to look up + auto container = InterpretNodeForImmediateUse(ocn[0]); + + if(container == nullptr) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_LIST), true); + + EvaluableNodeReference index_list; + + if(container->IsAssociativeArray()) + { + auto &container_mcn = container->GetMappedChildNodesReference(); + index_list.reference = evaluableNodeManager->AllocListNodeWithOrderedChildNodes(ENT_STRING, container_mcn.size()); + + //create all the string references at once for speed (especially multithreading) + string_intern_pool.CreateStringReferences(container_mcn, [](auto n) { return n.first; }); + + auto &index_list_ocn = index_list->GetOrderedChildNodes(); + size_t index = 0; + for(auto &[node_id, _] : container_mcn) + index_list_ocn[index++]->SetStringIDWithReferenceHandoff(node_id); + } + else if(container->IsOrderedArray()) + { + size_t num_ordered_nodes = container->GetOrderedChildNodesReference().size(); + index_list.reference = evaluableNodeManager->AllocListNodeWithOrderedChildNodes(ENT_NUMBER, num_ordered_nodes); + + auto &index_list_ocn = index_list->GetOrderedChildNodes(); + for(size_t i = 0; i < num_ordered_nodes; i++) + index_list_ocn[i]->SetNumberValue(static_cast(i)); + } + else //no child nodes, just alloc an empty list + index_list.reference = evaluableNodeManager->AllocNode(ENT_LIST); + + //none of the original container is needed + evaluableNodeManager->FreeNodeTreeIfPossible(container); + + return index_list; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_VALUES(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 1) + return EvaluableNodeReference::Null(); + + bool only_unique_values = false; + if(ocn.size() >= 2) + only_unique_values = InterpretNodeIntoBoolValue(ocn[1]); + + //get assoc array to look up + auto container = InterpretNode(ocn[0]); + + //make new list containing the values + EvaluableNode *result = evaluableNodeManager->AllocNode(ENT_LIST); + + if(container == nullptr) + return EvaluableNodeReference(result, true); + + if(!only_unique_values) + { + result->ReserveOrderedChildNodes(container->GetNumChildNodes()); + if(container->IsOrderedArray()) + { + auto &container_ocn = container->GetOrderedChildNodesReference(); + result->AppendOrderedChildNodes(container_ocn); + } + else if(container->IsAssociativeArray()) + { + for(auto &[_, cn] : container->GetMappedChildNodesReference()) + result->AppendOrderedChildNode(cn); + } + } + else //only_unique_values + { + //if noncyclic data, simple container, and sufficiently few nodes for an n^2 comparison + // just do the lower overhead check with more comparisons + if(!container->GetNeedCycleCheck() && !container->IsAssociativeArray() && container->GetNumChildNodes() < 10) + { + auto &container_ocn = container->GetOrderedChildNodes(); + for(size_t i = 0; i < container_ocn.size(); i++) + { + //check everything prior + bool value_exists = false; + for(size_t j = 0; j < i; j++) + { + if(EvaluableNode::AreDeepEqual(container_ocn[i], container_ocn[j])) + { + value_exists = true; + break; + } + } + + if(!value_exists) + result->AppendOrderedChildNode(container_ocn[i]); + } + } + else //use a hash-set and look up stringified values for collisions + { + //attempt to emplace/insert the unparsed node into values_in_existance, and if successful, append the value + FastHashSet values_in_existance; + + if(container->IsOrderedArray()) + { + for(auto &n : container->GetOrderedChildNodesReference()) + { + std::string str_value = Parser::Unparse(n, evaluableNodeManager, false, false, true); + if(values_in_existance.emplace(str_value).second) + result->AppendOrderedChildNode(n); + } + } + else if(container->IsAssociativeArray()) + { + for(auto &[_, cn] : container->GetMappedChildNodesReference()) + { + std::string str_value = Parser::Unparse(cn, evaluableNodeManager, false, false, true); + if(values_in_existance.emplace(str_value).second) + result->AppendOrderedChildNode(cn); + } + } + + } + } + + //the container itself isn't needed + evaluableNodeManager->FreeNodeIfPossible(container); + + return EvaluableNodeReference(result, container.unique); +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CONTAINS_INDEX(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //get assoc array to look up + auto container = InterpretNodeForImmediateUse(ocn[0]); + if(container == nullptr) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + + auto node_stack = CreateInterpreterNodeStackStateSaver(container); + + //get index to look up (will attempt to reuse this node below) + auto index = InterpretNodeForImmediateUse(ocn[1]); + + EvaluableNode **target = TraverseToDestinationFromTraversalPathList(&container.reference, index, false); + EvaluableNodeType result = (target != nullptr ? ENT_TRUE : ENT_FALSE); + + evaluableNodeManager->FreeNodeTreeIfPossible(container); + + //see if can reuse index node + EvaluableNodeReference retval; + if(index != nullptr && index.unique) + { + if(!index->GetNeedCycleCheck()) + evaluableNodeManager->FreeNodeChildNodes(index); + + index->ClearAndSetType(result); + retval = EvaluableNodeReference(index.reference, true); + } + else //need a new node + { + retval = EvaluableNodeReference(evaluableNodeManager->AllocNode(result), true); + } + + return retval; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_CONTAINS_VALUE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //get assoc array to look up + auto collection = InterpretNodeForImmediateUse(ocn[0]); + + if(collection == nullptr) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_FALSE), true); + + auto node_stack = CreateInterpreterNodeStackStateSaver(collection); + + //get value to look up (will attempt to reuse this node below) + auto value = InterpretNodeForImmediateUse(ocn[1]); + + EvaluableNodeType result = ENT_FALSE; + + //try to find value + if(collection->IsAssociativeArray()) + { + for(auto &[_, cn] : collection->GetMappedChildNodesReference()) + { + if(EvaluableNode::AreDeepEqual(cn, value)) + { + result = ENT_TRUE; + break; + } + } + } + else if(collection->IsOrderedArray()) + { + for(auto &cn : collection->GetOrderedChildNodesReference()) + { + if(EvaluableNode::AreDeepEqual(cn, value)) + { + result = ENT_TRUE; + break; + } + } + } + else if(collection->GetType() == ENT_STRING) + { + //compute regular expression + const std::string &s = collection->GetStringValue(); + + std::string value_as_str = EvaluableNode::ToString(value); + + //use nosubs to prevent unnecessary memory allocations since this is just matching + std::regex rx; + bool valid_rx = true; + try { + rx.assign(value_as_str, std::regex::ECMAScript | std::regex::nosubs); + } + catch(...) + { + valid_rx = false; + } + + if(valid_rx && std::regex_match(s, rx)) + result = ENT_TRUE; + } + + evaluableNodeManager->FreeNodeTreeIfPossible(collection); + + //see if can reuse value node + EvaluableNodeReference retval; + if(value != nullptr && value.unique) + { + if(!value->GetNeedCycleCheck()) + evaluableNodeManager->FreeNodeChildNodes(value); + + value->ClearAndSetType(result); + retval = EvaluableNodeReference(value.reference, true); + } + else //need a new node + { + retval = EvaluableNodeReference(evaluableNodeManager->AllocNode(result), true); + } + + return retval; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_REMOVE(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //get assoc array to look up + auto container = InterpretNode(ocn[0]); + if(container == nullptr) + return EvaluableNodeReference::Null(); + //make sure it's editable + if(!container.unique) + container.reference = evaluableNodeManager->AllocNode(container); + + auto node_stack = CreateInterpreterNodeStackStateSaver(container); + + //get indices (or index) to remove + auto indices = InterpretNodeForImmediateUse(ocn[1]); + if(indices == nullptr) //if not found, just return container unmodified + return container; + + //used for deleting nodes if possible -- unique and cycle free + EvaluableNodeReference removed_node = EvaluableNodeReference(nullptr, container.unique && !container->GetNeedCycleCheck()); + + //if not a list, then just remove individual element + auto &indices_ocn = indices->GetOrderedChildNodes(); + if(indices_ocn.size() == 0) + { + if(container->IsAssociativeArray()) + { + StringInternPool::StringID key_sid = EvaluableNode::ToStringIDIfExists(indices); + removed_node.reference = container->EraseMappedChildNode(key_sid); + } + else if(container->IsOrderedArray()) + { + double relative_pos = EvaluableNode::ToNumber(indices); + auto &container_ocn = container->GetOrderedChildNodesReference(); + + //get relative position + size_t actual_pos = 0; + if(relative_pos >= 0) + actual_pos = static_cast(relative_pos); + else + actual_pos = static_cast(container_ocn.size() + relative_pos); + + //if the position is valid, erase it + if(actual_pos >= 0 && actual_pos < container_ocn.size()) + { + removed_node.reference = container_ocn[actual_pos]; + container_ocn.erase(begin(container_ocn) + actual_pos); + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(removed_node); + } + else //remove all of the child nodes of the index + { + if(container->IsAssociativeArray()) + { + for(auto &cn : indices_ocn) + { + StringInternPool::StringID key_sid = EvaluableNode::ToStringIDIfExists(cn); + removed_node.reference = container->EraseMappedChildNode(key_sid); + evaluableNodeManager->FreeNodeTreeIfPossible(removed_node); + } + } + else if(container->IsOrderedArray()) + { + auto &container_ocn = container->GetOrderedChildNodesReference(); + + //get valid indices to erase + std::vector indices_to_erase; + indices_to_erase.reserve(indices_ocn.size()); + for(auto &cn : indices_ocn) + { + double relative_pos = EvaluableNode::ToNumber(cn); + + //get relative position + size_t actual_pos = 0; + if(relative_pos >= 0) + actual_pos = static_cast(relative_pos); + else + actual_pos = static_cast(container_ocn.size() + relative_pos); + + //if the position is valid, mark it to be erased + if(actual_pos >= 0 && actual_pos < container_ocn.size()) + indices_to_erase.push_back(actual_pos); + } + + //sort reversed so the indices can be removed consistently and efficiently + std::sort(begin(indices_to_erase), end(indices_to_erase), std::greater<>()); + + //remove indices in revers order and free if possible + for(size_t index : indices_to_erase) + { + //if there were any duplicate indices, skip them + if(index >= container_ocn.size()) + continue; + + removed_node.reference = container_ocn[index]; + container_ocn.erase(begin(container_ocn) + index); + evaluableNodeManager->FreeNodeTreeIfPossible(removed_node); + } + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(indices); + + return container; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_KEEP(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + //get assoc array to look up + auto container = InterpretNode(ocn[0]); + if(container == nullptr) + return EvaluableNodeReference::Null(); + //make sure it's editable + if(!container.unique) + container.reference = evaluableNodeManager->AllocNode(container); + + auto node_stack = CreateInterpreterNodeStackStateSaver(container); + + //get indices (or index) to keep + auto indices = InterpretNodeForImmediateUse(ocn[1]); + if(indices == nullptr) //if not found, just return container unmodified + return container; + + //if not a list, then just remove individual element + auto &indices_ocn = indices->GetOrderedChildNodes(); + if(indices_ocn.size() == 0) + { + if(container->IsAssociativeArray()) + { + StringInternPool::StringID key_sid = EvaluableNode::ToStringIDWithReference(indices); + auto &container_mcn = container->GetMappedChildNodesReference(); + + //find what should be kept, or clear key_sid if not found + EvaluableNode *to_keep = nullptr; + auto found_to_keep = container_mcn.find(key_sid); + if(found_to_keep != end(container_mcn)) + to_keep = found_to_keep->second; + else + { + string_intern_pool.DestroyStringReference(key_sid); + key_sid = string_intern_pool.NOT_A_STRING_ID; + } + + //free everything not kept if possible + if(container.unique && !container->GetNeedCycleCheck()) + { + for(auto &[cn_id, cn] : container_mcn) + { + if(cn_id != key_sid) + evaluableNodeManager->FreeNodeTree(cn); + } + } + + //put to_keep back in (have the string reference from above) + container->ClearMappedChildNodes(); + if(key_sid != string_intern_pool.NOT_A_STRING_ID) + container_mcn.insert(std::make_pair(key_sid, to_keep)); + } + else if(container->IsOrderedArray()) + { + double relative_pos = EvaluableNode::ToNumber(indices); + auto &container_ocn = container->GetOrderedChildNodesReference(); + + //get relative position + size_t actual_pos = 0; + if(relative_pos >= 0) + actual_pos = static_cast(relative_pos); + else + actual_pos = static_cast(container_ocn.size() + relative_pos); + + //if the position is valid, erase everything but that position + if(actual_pos >= 0 && actual_pos < container_ocn.size()) + { + + //free everything not kept if possible + if(container.unique && !container->GetNeedCycleCheck()) + { + for(size_t i = 0; i < container_ocn.size(); i++) + { + if(i != actual_pos) + evaluableNodeManager->FreeNodeTree(container_ocn[i]); + } + } + + EvaluableNode *to_keep = container_ocn[actual_pos]; + container_ocn.clear(); + container_ocn.push_back(to_keep); + } + } + } + else //keep all of the child nodes of the index + { + if(container->IsAssociativeArray()) + { + auto &container_mcn = container->GetMappedChildNodesReference(); + EvaluableNode::AssocType new_container; + + for(auto &cn : indices_ocn) + { + StringInternPool::StringID key_sid = EvaluableNode::ToStringIDIfExists(cn); + + //if found, move it over to the new container + auto found_to_keep = container_mcn.find(key_sid); + if(found_to_keep != end(container_mcn)) + { + new_container.insert(std::make_pair(found_to_keep->first, found_to_keep->second)); + container_mcn.erase(found_to_keep); + } + } + + //anything left should be freed if possible + if(container.unique && !container->GetNeedCycleCheck()) + { + for(auto &[_, cn] : container_mcn) + evaluableNodeManager->FreeNodeTree(cn); + } + string_intern_pool.DestroyStringReferences(container_mcn, [](auto &pair) { return pair.first; }); + + //put in place + std::swap(container_mcn, new_container); + } + else if(container->IsOrderedArray()) + { + auto &container_ocn = container->GetOrderedChildNodesReference(); + + //get valid indices to keep + std::vector indices_to_keep; + indices_to_keep.reserve(indices_ocn.size()); + for(auto &cn : indices_ocn) + { + double relative_pos = EvaluableNode::ToNumber(cn); + + //get relative position + size_t actual_pos = 0; + if(relative_pos >= 0) + actual_pos = static_cast(relative_pos); + else + actual_pos = static_cast(container_ocn.size() + relative_pos); + + //if the position is valid, mark it to be erased + if(actual_pos >= 0 && actual_pos < container_ocn.size()) + indices_to_keep.push_back(actual_pos); + } + + //sort to keep in order and remove duplicates + std::sort(begin(indices_to_keep), end(indices_to_keep)); + + std::vector new_container; + new_container.reserve(indices_to_keep.size()); + + //move indices over, but keep track of the previous one to skip duplicates + size_t prev_index = std::numeric_limits::max(); + for(size_t i = 0; i < indices_to_keep.size(); i++) + { + size_t index = indices_to_keep[i]; + + if(index == prev_index) + continue; + + new_container.push_back(container_ocn[index]); + + //set to null so it won't be cleared later + container_ocn[index] = nullptr; + + prev_index = index; + } + + //free anything left in original container + if(container.unique && !container->GetNeedCycleCheck()) + { + for(auto cn : container_ocn) + evaluableNodeManager->FreeNodeTree(cn); + } + + //put in place + std::swap(container_ocn, new_container); + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(indices); + + return container; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ASSOCIATE(EvaluableNode *en) +{ + //use stack to lock it in place, but copy it back to temporary before returning + EvaluableNodeReference new_assoc(evaluableNodeManager->AllocNode(ENT_ASSOC), true); + + auto &ocn = en->GetOrderedChildNodes(); + size_t num_nodes = ocn.size(); + + if(num_nodes > 0) + { + new_assoc->ReserveMappedChildNodes(num_nodes / 2); + + #ifdef MULTITHREAD_SUPPORT + if(en->GetConcurrency() && num_nodes > 1) + { + auto enqueue_task_lock = Concurrency::threadPool.BeginEnqueueBatchTask(); + if(enqueue_task_lock.AreThreadsAvailable()) + { + auto node_stack = CreateInterpreterNodeStackStateSaver(new_assoc); + + //get keys + std::vector keys; + keys.reserve(num_nodes / 2); + + for(size_t i = 0; i + 1 < num_nodes; i += 2) + keys.push_back(InterpretNodeIntoStringIDValueWithReference(ocn[i])); + + ConcurrencyManager concurrency_manager(this, num_nodes / 2); + + //kick off interpreters + for(size_t node_index = 0; node_index + 1 < num_nodes; node_index += 2) + concurrency_manager.PushTaskToResultFuturesWithConstructionStack(ocn[node_index + 1], en, new_assoc, + EvaluableNodeImmediateValueWithType(keys[node_index / 2]), nullptr); + + enqueue_task_lock.Unlock(); + + concurrency_manager.EndConcurrency(); + + //add results to assoc + auto results = concurrency_manager.GetResultsAndFreeReferences(); + for(size_t i = 0; i < num_nodes / 2; i++) + { + auto key_sid = keys[i]; + auto &value = results[i]; + + //add it to the list + new_assoc->SetMappedChildNodeWithReferenceHandoff(key_sid, value); + new_assoc.UpdatePropertiesBasedOnAttachedNode(value); + } + + return new_assoc; + } + } + #endif + + //construction stack has a reference, so no KeepNodeReference isn't needed for anything referenced + PushNewConstructionContext(en, new_assoc, EvaluableNodeImmediateValueWithType(StringInternPool::NOT_A_STRING_ID), nullptr); + + for(size_t i = 0; i < num_nodes; i += 2) + { + //get key + StringInternPool::StringID key_sid = InterpretNodeIntoStringIDValueWithReference(ocn[i]); + + SetTopTargetValueIndexInConstructionStack(key_sid); + + //compute the value, but make sure have another node + EvaluableNodeReference value; + if(i + 1 < num_nodes) + value = InterpretNode(ocn[i + 1]); + + //handoff the reference from index_value to the assoc + new_assoc->SetMappedChildNodeWithReferenceHandoff(key_sid, value); + new_assoc.UpdatePropertiesBasedOnAttachedNode(value); + } + + PopConstructionContext(); + } + + return new_assoc; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_ZIP(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + size_t num_params = ocn.size(); + if(num_params < 1) + return EvaluableNodeReference::Null(); + + //get the indices of the parameters based on how many there are + size_t index_list_index = 0; + size_t value_list_index = 1; + + auto node_stack = CreateInterpreterNodeStackStateSaver(); + + //if a function is specified, then set up appropriate data structures to call the function and move the indices for the index and value parameters + EvaluableNodeReference function = EvaluableNodeReference::Null(); + if(num_params == 3) + { + index_list_index++; + value_list_index++; + + function = InterpretNodeForImmediateUse(ocn[0]); + node_stack.PushEvaluableNode(function); + } + + //attempt to get indices, the keys of the assoc + auto index_list = InterpretNodeForImmediateUse(ocn[index_list_index]); + if(index_list == nullptr) + { + EvaluableNodeReference result(evaluableNodeManager->AllocNode(ENT_ASSOC), true); + return result; + } + + //attempt to get the value(s) of the assoc + EvaluableNodeReference value_list = EvaluableNodeReference::Null(); + if(ocn.size() > value_list_index) + { + node_stack.PushEvaluableNode(index_list); + value_list = InterpretNode(ocn[value_list_index]); + node_stack.PopEvaluableNode(); + } + + //set up the result + EvaluableNodeReference result(evaluableNodeManager->AllocNode(ENT_ASSOC), true); + //values will be placed in, so it should be updated as if it will contain them all + if(value_list != nullptr) + result.UpdatePropertiesBasedOnAttachedNode(value_list); + + if(function != nullptr) + { + node_stack.PushEvaluableNode(index_list); + node_stack.PushEvaluableNode(value_list); + } + + auto &index_list_ocn = index_list->GetOrderedChildNodes(); + result->ReserveMappedChildNodes(index_list_ocn.size()); + for(size_t i = 0; i < index_list_ocn.size(); i++) + { + //convert index to string + EvaluableNode *index = index_list_ocn[i]; + if(index == nullptr) + continue; + + //create the reference to handoff below + StringInternPool::StringID index_sid = EvaluableNode::ToStringIDWithReference(index); + + //get value + EvaluableNode *value = nullptr; + if(value_list != nullptr) + { + if(i < value_list->GetOrderedChildNodes().size()) + value = value_list->GetOrderedChildNodes()[i]; + else //not a list, so just use the value itself + { + value = value_list; + //reusing the value, so can't be cycle free in the result + result->SetNeedCycleCheck(true); + //and the value might no longer be unique and be able to be freed + value_list.unique = false; + } + } + + //if no function, then just put value into the appropriate slot for the index + if(function == nullptr) + { + result->SetMappedChildNodeWithReferenceHandoff(index_sid, value, true); + } + else //has a function, so handle collisions appropriately + { + //try to insert without overwriting + if(!result->SetMappedChildNodeWithReferenceHandoff(index_sid, value, false)) + { + //collision occurred, so call function + EvaluableNode **cur_value_ptr = result->GetOrCreateMappedChildNode(index_sid); + + PushNewConstructionContext(nullptr, result, EvaluableNodeImmediateValueWithType(index_sid), *cur_value_ptr); + PushNewConstructionContext(nullptr, result, EvaluableNodeImmediateValueWithType(index_sid), value); + + EvaluableNodeReference collision_result = InterpretNode(function); + + PopConstructionContext(); + PopConstructionContext(); + + *cur_value_ptr = collision_result; + result.UpdatePropertiesBasedOnAttachedNode(collision_result); + } + } + } + + if(function != nullptr) + { + //the index list has been converted to strings, so therefore can be freed + evaluableNodeManager->FreeNodeTreeIfPossible(index_list); + //the values have likely been copied, so only the top node can be freed + evaluableNodeManager->FreeNodeIfPossible(value_list); + } + + return result; +} + +EvaluableNodeReference Interpreter::InterpretNode_ENT_UNZIP(EvaluableNode *en) +{ + auto &ocn = en->GetOrderedChildNodes(); + + if(ocn.size() < 2) + return EvaluableNodeReference::Null(); + + auto zipped = InterpretNode(ocn[0]); + if(zipped == nullptr) + return EvaluableNodeReference(evaluableNodeManager->AllocNode(ENT_LIST), true); + + auto node_stack = CreateInterpreterNodeStackStateSaver(zipped); + auto index_list = InterpretNodeForImmediateUse(ocn[1]); + node_stack.PopEvaluableNode(); + + EvaluableNodeReference result(evaluableNodeManager->AllocNode(ENT_LIST), true); + + if(index_list == nullptr) + return result; + + auto &index_list_ocn = index_list->GetOrderedChildNodes(); + result.UpdatePropertiesBasedOnAttachedNode(zipped); + + auto &result_ocn = result->GetOrderedChildNodesReference(); + result_ocn.reserve(index_list_ocn.size()); + + if(EvaluableNode::IsAssociativeArray(zipped)) + { + for(auto &index : index_list_ocn) + { + StringInternPool::StringID index_sid = EvaluableNode::ToStringIDIfExists(index); + + EvaluableNode **found = zipped->GetMappedChildNode(index_sid); + if(found != nullptr) + result_ocn.push_back(*found); + else + result_ocn.push_back(nullptr); + } + } + else //ordered list + { + auto &zipped_ocn = zipped->GetOrderedChildNodes(); + for(auto &index : index_list_ocn) + { + double index_value = EvaluableNode::ToNumber(index); + if(index_value < 0) + { + index_value += zipped_ocn.size(); + if(index_value < 0) //clamp at zero + index_value = 0; + } + + if(FastIsNaN(index_value) || index_value >= zipped_ocn.size()) + result_ocn.push_back(nullptr); + else + result_ocn.push_back(zipped_ocn[static_cast(index_value)]); + } + } + + evaluableNodeManager->FreeNodeTreeIfPossible(index_list); + return result; +} diff --git a/src/Amalgam/out.txt b/src/Amalgam/out.txt new file mode 100644 index 00000000..066960f2 --- /dev/null +++ b/src/Amalgam/out.txt @@ -0,0 +1,4726 @@ +--Amalgam Version-- +43.0.1-alpha+local.dev +--system_time-- +--label-- +hello world: 12 and 2 +hello world: 12 and 2 +--non-keyword token-- +6 +notakeyword +--get_defaults-- +(assoc + != 0.65 + !~ 0.1 + * 0.65 + + 0.9 + - 0.65 + / 0.6 + < 0.85 + <= 0.85 + = 1.2 + > 0.85 + >= 0.85 + abs 0.4 + accum 0.25 + accum_entity_roots 0.01 + accum_to_entities 0.5 + acos 0.2 + acosh 0.07 + and 0.75 + append 0.65 + apply 0.5 + args 0.08 + asin 0.2 + asinh 0.07 + assign 0.95 + assign_entity_roots 0.01 + assign_to_entities 0.5 + assoc 3 + associate 0.8 + atan 0.2 + atanh 0.07 + call 1.5 + call_container 0.5 + call_entity 0.5 + call_entity_get_changes 0.05 + call_sandboxed 0.25 + ceil 0.6 + clone_entities 0.1 + commonality 0.2 + commonality_entities 0.02 + compute_entity_convictions 0.2 + compute_entity_distance_contributions 0.2 + compute_entity_group_kl_divergence 0.2 + compute_entity_kl_divergences 0.2 + compute_on_contained_entities 0.3 + concat 0.2 + conclude 0.05 + contained_entities 0.3 + contains_entity 0.1 + contains_index 0.5 + contains_label 0.5 + contains_value 0.5 + cos 0.2 + cosh 0.07 + create_entities 0.1 + crypto_sign 0.01 + crypto_sign_verify 0.01 + declare 0.5 + decrypt 0.01 + destroy_entities 0.1 + difference 0.2 + difference_entities 0.02 + direct_assign_to_entities 0.01 + direct_retrieve_from_entity 0.01 + dot_product 0.2 + edit_distance 0.2 + edit_distance_entities 0.02 + encrypt 0.01 + erf 0.05 + exp 0.4 + explode 0.02 + false 0.1 + filter 0.5 + first 0.65 + flatten_entity 0.02 + floor 0.6 + format 0.05 + generalized_distance 0.15 + get 3 + get_all_labels 0.05 + get_comments 0.05 + get_concurrency 0.01 + get_defaults 0.01 + get_digits 0.1 + get_entity_comments 0.01 + get_entity_rand_seed 0.01 + get_entity_root_permission 0.01 + get_labels 0.1 + get_rand_seed 0.02 + get_type 0.25 + get_type_string 0.25 + get_value 0.15 + if 1 + indices 0.5 + intersect 0.2 + intersect_entities 0.02 + keep 0.5 + lambda 1.5 + last 0.65 + let 0.95 + lgamma 0.07 + list 2.5 + load 0.01 + load_entity 0.01 + load_persistent_entity 0.01 + log 0.4 + map 1.1 + max 0.4 + min 0.4 + mix 0.2 + mix_entities 0.02 + mix_labels 0.2 + mod 0.2 + move_entities 0.15 + mutate 0.2 + mutate_entity 0.02 + not 0.75 + null 0.75 + number 8 + or 0.75 + parallel 0.5 + parse 0.05 + pow 0.2 + print 0.01 + query_among 0.2 + query_between 0.2 + query_count 0.2 + query_equals 0.2 + query_exists 0.2 + query_generalized_mean 0.2 + query_greater_or_equal_to 0.2 + query_in_entity_list 0.2 + query_less_or_equal_to 0.2 + query_max 0.2 + query_max_difference 0.2 + query_min 0.2 + query_min_difference 0.2 + query_mode 0.2 + query_nearest_generalized_distance 0.2 + query_not_among 0.2 + query_not_between 0.2 + query_not_equals 0.2 + query_not_exists 0.2 + query_not_in_entity_list 0.2 + query_quantile 0.2 + query_sample 0.2 + query_select 0.2 + query_sum 0.2 + query_value_masses 0.2 + query_weighted_sample 0.2 + query_within_generalized_distance 0.2 + rand 0.4 + range 0.5 + reduce 0.7 + remove 0.5 + replace 0.1 + retrieve 0.1 + retrieve_entity_root 0.01 + retrieve_from_entity 0.5 + reverse 0.4 + rewrite 0.1 + round 0.6 + seq 0.5 + set 0.35 + set_comments 0.05 + set_concurrency 0.01 + set_digits 0.1 + set_entity_rand_seed 0.01 + set_entity_root_permission 0.01 + set_labels 0.1 + set_rand_seed 0.02 + set_type 0.35 + set_value 0.15 + sin 0.2 + sinh 0.07 + size 0.6 + sort 0.5 + split 0.2 + sqrt 0.2 + stack 0.05 + store 0.01 + store_entity 0.01 + string 4 + substr 0.2 + symbol 25 + system 0.05 + system_time 0.01 + tail 0.65 + tan 0.2 + tanh 0.07 + target 0.1 + target_index 0.1 + target_value 0.1 + tgamma 0.07 + total_entity_size 0.02 + total_size 0.2 + true 0.1 + trunc 0.65 + union 0.2 + union_entities 0.02 + unparse 0.05 + unzip 0.25 + values 0.5 + weave 0.2 + weighted_rand 0.02 + while 0.1 + xor 0.75 + zip 0.35 + zip_labels 0.02 + ~ 0.1 +) +(assoc + change_label 0.04 + change_type 0.28 + deep_copy_elements 0.05 + delete 0.12 + delete_elements 0.04 + insert 0.23 + swap_elements 0.24 +) +--parse and unparse-- +(print "hello") +(list .nan .nan .infinity -.infinity) + +(assoc b 2 a 1 c (list "alpha" "beta" "gamma")) +(assoc + b 2 + a 1 + c (list "alpha" "beta" "gamma") +) + +--if-- +if 1 +if 2 +4 +--seq-- +1 +2 +3 +--lambda and call-- +(declare + (assoc x 6) + (+ x 2) +) +5 +(lambda + (+ 1 2) +) +--call_sandboxed-- +7 +.nan +--while-- +1 +2 +3 +4 +5 +6 +7 +8 +9 +--conclude-- +seq1 success +while1 success +let1 success +declare1 success +142 +--declare-- +7 +7 +--let-- +10 +11 +--assign-- +10 +20 +(list + 0 + 1 + 2 + (assoc a 1 b 2 c 3) +) +(list + 0 + "not 1" + 2 + (assoc a 1 b 2 c 3) +) +(list + 0 + "not 1" + 2 + (assoc + a (list "a attribute") + b 2 + c (list "c attribute") + ) +) +--accum-- +10 +11 +abcdef +(list 1 2 3 4 5 6) + +(list + 1 + 2 + 3 + 4 + 5 + 6 + "7" + 8 +) + +(assoc + a 1 + b 2 + c 3 + d 4 +) + +(assoc + a 1 + b 2 + c 3 + d 4 + e 5 +) + +5 +(list + 0 + 2 + 2 + (assoc a 1 b 2 c 3) +) + +--retrieve-- +1 +1 +(list 1 2) +(assoc raaa 2 rwww 1) +--assign-- +8 +12 +(list 0 1 10 3 4) +(assoc a 3 b 2) +--+-- +10 +----- +-8 +-3 +--*-- +24 +--/-- +0.041666666666666664 +--mod-- +1 +--get_digits-- +(list + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 0 + 0 + 0 +) +(list + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 8 + 9 + 9 +) +(list + 5 + 6 + 7 + 8 + 0 + 0 + 0 + 0 + 0 + 0 + 0 +) +(list 1 1 1) +(list 1 0 0 0 0) +(list 1 2 0) +(list 1 1 1 1) +(list 1 0 0 0 0) +(list 2 0) +(list 0 0 0 0 1 1) +(list 1 0) +(list + 1 + 0 + 0 + 0 + 0 + 0 + 1 + 0 + 0 + 0 + 1 +) +(list 1 2 3 4 5) +(list 5 6 7 8 0) +(list 1 2 3 4 5) +(list 5 6 7 8 0) +(list) +(list) +(list) +(list + 4 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 +) +--set_digits-- +5554567.8 +1234567.555 +4 +1.5 +3 +1.5 +55501234567.8 +2.25 +5555567.8 +1234555.55 +5.555567800000001e+100 +1.2345555499999999e+100 +(list 1 0 1 0) +(list 1 0 1 0) +(list 1 0 1 0) +--floor-- +1 +--ceil-- +2 +--round-- +13 +10 +123.46 +120 +120 +123.46 +123 +0 +1.2 +0 +0.012 +0.01 +0 +0.012345678 +0.6 +0.6 +0.3 +--exp-- +1.6487212707001282 +--log-- +-0.6931471805599453 +4 +--sin-- +0.479425538604203 +--asin-- +0.5235987755982989 +--cos-- +0.8775825618903728 +--acos-- +1.0471975511965979 +--tan-- +0.5463024898437905 +--atan-- +0.4636476090008061 +0.7853981633974483 +--sinh-- +0.5210953054937474 +--asinh-- +0.48121182505960347 +--cosh-- +1.1276259652063807 +--acosh-- +.nan +--tanh-- +0.46211715726000974 +--atanh-- +0.5493061443340549 +--erf-- +0.5204998778130465 +--tgamma-- +1.772453850905516 +--lgamma-- +0.5723649429247001 +--sqrt-- +0.7071067811865476 +--pow-- +0.25 +--abs-- +0.5 +--max-- +9 +8 +(null) +--min-- +-5 +4 +--dot_product-- +6 +6 +6 +--generalized_distance-- + 0 2.0874003024080013e+234 + 1 1 + 2 5 + 3 3 + 4 1.9210176984148622e-48 + 5 5 + 6 3.5355339059327378 + 7 3.5 + 8 3.482050807568877 + 9 3.467687001077147 +10 3.4644599990846436 +11 3.4641374518767565 +12 3.4641016151377544 +13 .infinity +14 .infinity +15 .infinity +16 .infinity +17 2 +18 8 +19 8 +20 0.6666 +21 2.6664 +22 2.6664 +23 1.9997999999999998 +24 92.6574 +25 2.2360679796198744 +26 2.23606797749979 +27 3.0000000031604355 +28 3 +29 .nan +30 5 +31 4 +32 4 +33 4 +34 2 +35 2.6009928340740736 +--entropy-- +0.6931471805599453 +0.14384103622589045 +0.14384103622589045 +0.14384103622589045 +1.6739764335716716 +0.14384103622589045 +--first-- +4 +2 +1 +0 +a +.nas +--tail-- +(list 9.2 "this") +(list 2 3 4 5 6) +(list 5 6) +(list 3 4 5 6) +(list) +(list 1 2 3 4 5 6) +(list 1 2 3 4 5 6) +(list) +(assoc + a 1 + b 2 + c 3 + d 4 + e 5 +) +(assoc c 3 d 4) +(assoc + a 1 + c 3 + d 4 + e 5 +) +(assoc + a 1 + b 2 + c 3 + d 4 + e 5 + f 6 +) +(assoc) +2 +0 +bcdef +ef +cdef +abcdef + +abcdef + +.nas +--last-- +this +2 +1 +0 +c +.nas +--trunc-- +(list 4 9.2) +(list 1 2 3 4 5) +(list 1 2) +(list 1 2 3 4) +(list) +(list 1 2 3 4 5 6) +(list 1 2 3 4 5 6) +(list) +(assoc + a 1 + b 2 + c 3 + d 4 + e 5 +) +(assoc c 3 d 4) +(assoc + a 1 + c 3 + d 4 + e 5 +) +(assoc + a 1 + b 2 + c 3 + d 4 + e 5 + f 6 +) +(assoc) +2 +0 +abcde +ab +abcd +abcdef + +abcdef + +.nas +--append-- +(list + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +) +(assoc + 0 1 + 1 2 + 2 3 + 3 7 + 4 8 + 5 9 + a 4 + b 5 + c 6 + d 10 + e 11 +) +(list 4 9.2 "this" "end") +(assoc + 0 4 + 1 9.2 + 2 "this" + 3 "end" +) +--size-- +3 +4 +5 +--range-- +(list + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +) +(list + 10 + 9 + 8 + 7 + 6 + 5 + 4 + 3 + 2 + 1 + 0 +) +(list) +(list 0 1 2 3 4 5) +(list 12 12 12 12 12 12) +(list 1 2 3 4 5 6) +(list 1 2 3 4 5 6) +--replace-- +(list + (assoc a 13) +) +(list + (list 4 5 6) + (null) + 1 +) +(list + (list 4 5 6) + (null) + 1 +) +(list + (assoc a 1 b 2) +) +--rewrite-- +(list + (assoc a 14) +) +(list + (associate "a" 312) +) +(list + 8 + 8 + 8 + 8 + 8 + 8 + 8 + 8 + 8 +) +(+ 17 a) +--map-- +(list 2 4 6 8) +(list + 10 + 2 + 22 + 5 + 34 + 8 + 46 + 11 +) +(assoc + 10 11 + 20 22 + 30 33 + 40 44 +) +(list 3 4 5 6 7 8) +(list 3 4 5 6 7 .nan) +(assoc + 0 3 + 1 .nan + 2 .nan + 3 .nan + a .nan +) +--filter-- +(list 3 4) +(list 10 1 20) +(assoc 10 1) +(list 10 1 20 30 40 4) +(list + 10 + 1 + 20 + 30 + "" + 40 + 4 +) +(assoc + a 10 + b 1 + c 20 + d "" + e 30 + f 3 + h 4 +) +(assoc + a 10 + b 1 + c 20 + d "" + e 30 + f 3 + h 4 +) +--weave-- +(list 1 2 3) + +(list 1 2 3 4 5 6) + +(list 2 (null) 4 (null) 6 (null)) + +(list "a" 2 @(get (target 0) 0) 4 @(get (target 0) 0) 6) + +(list + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +) + +(list + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 +) + +(list + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 +) + +(list + 2 + 4 + 6 + 8 + 10 + 12 + 14 + 16 + 18 + 20 + 22 + 24 +) + +(list 1 2 3 4 5 6) + +(list + 1 + 2 + 3 + 5 + 4 + 5 + 6 + 6 + 7 +) + +(list 3 4 5) +--reduce-- +24 +24 +--apply-- +10 +15 +10 +--reverse-- +(list 5 4 3 2 1) +--sort-- +(list 1 3 4 5 9) +(list + 1 + 3.2 + 4 + "b" + "hello" + (list 1 2 3) + "n" + "soy" +) +(list + 1 + "1x" + "10" + 20 + "z2" + "z10" + "z100" +) +(list + "001x" + "010" + 1 + 20 + "z002" + "z010" + "z100" +) +(list 1 3 4 5 9) +(list + 1 + 0 + 6 + 2 + 4 + 3 + 10 + 5 + 7 + 9 + 8 +) +(list + "2020-06-08 lunes 11.32.36" + "2020-06-08 lunes 11.32.37" + "2020-06-08 lunes 11.32.38" + "2020-06-08 lunes 11.32.39" + "2020-06-08 lunes 11.32.46" + "2020-06-08 lunes 11.32.47" + "2020-06-08 lunes 11.32.48" + "2020-06-08 lunes 11.32.49" + "2020-06-08 lunes 11.32.56" + "2020-06-08 lunes 11.32.57" + "2020-06-08 lunes 11.32.58" + "2020-06-08 lunes 11.32.59" + "2020-06-08 lunes 11.33.36" + "2020-06-08 lunes 11.33.37" + "2020-06-08 lunes 11.33.38" + "2020-06-08 lunes 11.33.39" + "2020-06-08 lunes 11.33.40" + "2020-06-08 lunes 11.33.41" + "2020-06-08 lunes 11.33.42" + "2020-06-08 lunes 11.33.43" + "2020-06-08 lunes 11.33.44" + "2020-06-08 lunes 11.33.45" + "2020-06-08 lunes 11.33.46" + "2020-06-08 lunes 11.33.47" + "2020-06-08 lunes 11.33.48" +) +--indices-- +(list "4" "b" "a" "c") +(list + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 +) +--values-- +(list "d" 2 1 3) +(list + "a" + 1 + "b" + 2 + "c" + 3 + 4 + "d" +) +(list + "a" + 1 + "b" + 2 + "c" + 3 + 4 + "d" +) +(list "d" 2 1 0 3) +(list + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 +) +--contains_index-- +(true) + +(false) + +(true) + +(false) + +--contains_value-- +(true) + +(false) + +(true) + +(false) + +(true) +(true) +(true) +(true) +(false) +(true) +--remove-- +(assoc a 1 b 2 c 3) +(list + "a" + 1 + "b" + 2 + 3 + 4 + "d" +) +(assoc b 2 c 3) +(list + "a" + 1 + "b" + 2 + 3 + 4 + "d" +) +(list 1 3 4 5) +(list 0 1 2 3 4) +(list 1 2 3 4) +(list) +--keep-- +(assoc 4 "d") +(list "c") +(assoc 4 "d" a 1) +(list "c") +(list 0 2) +(list 5) +(list 0 5) +(list 0 1 2 3 4 5) +--zip-- +(assoc + a 1 + b 2 + c 3 + d 4 +) +(assoc + a (null) + b (null) + c (null) + d (null) +) +(assoc + a 3 + b @(get + (target 0) + "a" + ) + c @(get + (target 0) + "a" + ) + d @(get + (target 0) + "a" + ) +) +(assoc + a 4 + b 2 + c 3 + d 4 +) +(assoc + a 5 + b 2 + c 3 + d 4 +) +(assoc + a 2 + b 1 + c @(get + (target 0) + "b" + ) + d @(get + (target 0) + "b" + ) +) +--unzip-- +(list 1 2) +(list 1 3 2) +--get-- +(list 4 9.2 "this") +9.2 +3 +1 +(list 9.2 "this") + +2: 2 +(null): (null) +(null): (null) +3 +--set-- +(assoc + 4 "d" + a 1 + b 2 + c 3 + e 5 +) +(list 0 1 10 3 4) +(assoc a 3 b 2) +--target-- +(list 1 2 3) +--target_index-- +3 +(null) +(null) +(null) +--target_value-- +(null) +--stack-- +(list + (assoc + abcdefghijklmnop 1 + accum_assoc (assoc + a 1 + b 2 + c 3 + d 4 + e 5 + ) + accum_list (list + 1 + 2 + 3 + 4 + 5 + 6 + "7" + 8 + ) + accum_string "abcdef" + argv (list "C:\\Users\\Chris Hazard\\Desktop\\DP_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg") + bar (declare + (assoc x 6) + (+ x 2) + ) + foo (declare + (assoc x 6) + (+ x 2) + ) + get_test_assoc (assoc + A (assoc B 2) + B 2 + ) + interpreter "C:\\Users\\Chris Hazard\\Desktop\\DP_repos\\amalgam\\out\\build\\amd64-windows-vs\\release\\amalgam-mt.exe" + raaa 2 + rwww 1 + start_time 1693777318.473619 + www 1 + x 12 + zz 10 + ) +) +--args-- +(assoc + abcdefghijklmnop 1 + accum_assoc (assoc + a 1 + b 2 + c 3 + d 4 + e 5 + ) + accum_list (list + 1 + 2 + 3 + 4 + 5 + 6 + "7" + 8 + ) + accum_string "abcdef" + argv (list "C:\\Users\\Chris Hazard\\Desktop\\DP_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg") + bar (declare + (assoc x 6) + (+ x 2) + ) + foo (declare + (assoc x 6) + (+ x 2) + ) + get_test_assoc (assoc + A (assoc B 2) + B 2 + ) + interpreter "C:\\Users\\Chris Hazard\\Desktop\\DP_repos\\amalgam\\out\\build\\amd64-windows-vs\\release\\amalgam-mt.exe" + raaa 2 + rwww 1 + start_time 1693777318.473619 + www 1 + x 12 + zz 10 +) +(assoc bbb 3) +(assoc + abcdefghijklmnop 1 + accum_assoc (assoc + a 1 + b 2 + c 3 + d 4 + e 5 + ) + accum_list (list + 1 + 2 + 3 + 4 + 5 + 6 + "7" + 8 + ) + accum_string "abcdef" + argv (list "C:\\Users\\Chris Hazard\\Desktop\\DP_repos\\amalgam\\src\\Amalgam\\./amlg_code/full_test.amlg") + bar (declare + (assoc x 6) + (+ x 2) + ) + foo (declare + (assoc x 6) + (+ x 2) + ) + get_test_assoc (assoc + A (assoc B 2) + B 2 + ) + interpreter "C:\\Users\\Chris Hazard\\Desktop\\DP_repos\\amalgam\\out\\build\\amd64-windows-vs\\release\\amalgam-mt.exe" + raaa 2 + rwww 1 + start_time 1693777318.473619 + www 1 + x 12 + zz 10 +) +--and-- +true +(false) + +--or-- +1 +1 +(false) + +--xor-- +(true) + +(false) + +--not-- +(false) + +(true) + +--=-- +(false) + +(true) + +(true) + +(true) + +(true) + +(false) + +--!=-- +(false) + +(true) + +(false) + +(false) + +(false) + +(false) + +(true) + +--<-- +(true) + +(false) + +(true) + +(false) + +--<=-- +(true) + +(true) + +(true) + +(false) + +(false) + +(false) + +-->-- +(true) + +(false) + +(true) + +(false) + +-->=-- +(true) + +(true) + +(true) + +(false) + +(false) + +(false) + +--~-- +(true) + +(false) + +--!~-- +(false) + +(true) + +--rand-- +0.5392467821223486 +0.9942253561203879 +24.52361533058883 +4 +2 +(list) + +(list 8) + +(list + 9 + 4 + 1 + 10 + 0 + 3 + 5 + 8 + 7 + 6 +) + +(list 25.7161630409425 2.06111894792399 11.116455397472219 15.888330007965623) + +--weighted_rand-- +b +(list "b" "b" "b" "b") + +b +(list "a" "b" @(get (target 0) 0) @(get (target 0) 1)) + +(list "a" @(get (target 0) 0) "b" @(get (target 0) 2)) + +infinity test c or d: (list "d" "d" "d" "c") + +infinity test c or d: (list "c" @(get (target 0) 0) "d" @(get (target 0) 0)) + +(assoc a 30 b 46 c 24) + +(assoc a 29 b 44 c 27) + +(list "2" "4" "1") + +--get_rand_seed-- +ȼ\KOaVT z +--set_rand_seed-- +0.25443726063281735 +0.1603572451200187 +0.25443726063281735 +0.1603572451200187 +--true-- +(true) + +--false-- +(false) + +--null-- +(null) + +(null + (+ 3 5) + 7 +) +--node null-- +#nulltest (null) +--infinity-- +.infinity +-.infinity +--nan-- +.nan +--list-- +(list "a" 1 "b") +--associate-- +(assoc + 4 "d" + a 1 + b 2 + c 3 +) +--assoc-- +(assoc b 2 c 3) +--get_type-- +(+) +--get_type_string-- ++ +string +--set_type-- +(- 3 4) +(list "b" 3 "a" 4) +(list "b" 3 "a" 4) +(assoc a 4 b 3) +8.7 +(parallel + #react + (+ 3 4) +) +--format-- +(list + 97 + 98 + 99 + 100 + 101 + 102 + 103 + -17 + -84 + -105 + -22 + -83 + -106 + -49 + -95 +) +1: A +2: -100 +3: -100 +4: 100 +5: 365779719 +6: 123456789 +7: 14294967296 +8: 65 +9: 8.740783264876139e+294 +10: 6.409830999309918e-10 +11: 54 +12: 32 +13: 414141 +14: AAA +15: TWFueSBoYW5kcyBtYWtlIGxpZ2h0IHdvcmsu +16: TWFueSBoYW5kcyBtYWtlIGxpZ2h0IHdvcmsuLg== +17: TWFueSBoYW5kcyBtYWtlIGxpZ2h0IHdvcmsuLi4= +18: Many hands make light work. +19: Many hands make light work.. +19: Many hands make light work... +20: (list + (assoc a 3 b 4) + (assoc c "c") +) +21: [{"b":4,"a":3},{"c":"c","d":null}] +22: [{"a":3,"b":4},{"c":"c","d":null}] +23: b: 2 +a: 1 +e: + - a + - b + - .nan + - .inf +c: 3 +d: 4 + +24: a: 1 +b: 2 +c: 3 +d: 4 +e: + - a + - b + - .nan + - .inf + +25: (assoc a 1) +current date-time in epoch: 2023-09-03-17.41.58.7387200 +2020-06-07 00:22:59 +1391230800 +1391230800 +1391230800 +-6053-05-28 00:24:29 +2020-06-05 00:21:45 +06/05/20 00:21:45 EDT +45 + 5 + s + s +FriJun06/05/20 00:21:4505 5 f +jueves, ene. 01, 1970 +Sunday, Jun 07, 2020 +domingo, jun. 07, 2020 +1591502400 +Jun 07, 2020 +domingo, jun. 07, 2020 +664428 +-314954772 +1960-01-08 11.33.48 +1960-01-08 11.33.48.0100000 +--get_labels-- +(list "labelB") +--get_all_labels-- +(assoc + label-number-22 #label-number-22 3 + label21 #label21 + (print + "hello world: " + (* + @(get + (target 2) + "label-number-22" + ) + 4 + ) + #label23 " and " + (* 1 2) + ) + label23 @(get + (get + (target 0) + "label21" + ) + 2 + ) +) +(assoc + labelA #labelQ #labelA + (lambda + #labelB (true) + ) + labelB @(get + (get + (target 0) + "labelA" + ) + 0 + ) + labelQ @(get + (target 0) + "labelA" + ) +) +(assoc + labelA #labelQ #labelA + (lambda + #labelB (true) + ) + labelB @(get + (get + (target 0) + "labelA" + ) + 0 + ) + labelQ @(get + (target 0) + "labelA" + ) +) +--set_labels-- +#labelD #labelE (true) +--zip_labels-- +(list + #l1 1 + #l2 2 + #l3 3 +) +--get_comments-- +comment too +--set_comments-- +;new comment +(true) +--get_concurrency-- +(false) + +(true) + +(true) + +--set_concurrency-- +||(print "hello") + +;complex test +#somelabel +||(assoc a "hello" b 4) + +--get_value-- +(true) +--set_value-- +3 +--explode-- +(list + "a" + "b" + "c" + "d" + "e" + "f" + "g" + "ﬗ" + "ꭖ" + "ϡ" +) +(list + "a" + "b" + "c" + "d" + "e" + "f" + "g" + "" + "" + "" + "" + "" + "" + "" + "" +) +(list + "ab" + "cd" + "ef" + "g" + "" + "" + "" + "" +) +(list "abc" "def" "g" "" "ϡ") +(list "abcd" "efg" "" "ϡ") +--split-- +(list "hello world") +(list "hello" "world") +(list "hello" "world" "!") +(list "hello" "world !") +(list "hello " " the world") +(list "abcdefgﬗꭖϡ") +(list "abc" "de" "fgﬗꭖϡ") +(list "abc" "de" "fgﬗꭖϡ") +(list "abc" "de fgﬗꭖϡ") +(list "abc de fgﬗꭖϡ") +--substr-- +hello world +ello world +ello wo +ello world +ello worl +orl +orl +hxlo world +e +he +(list "he") + +(list "he" "wo") + +(list "he" "wo") + +(list + (list "he" "e") +) + +(list + (list "he" "e") + (list "wo" "o") +) + +(list + (list "he" "he" "h" "e") + (list "wo" "wo" "w" "o") +) + +(list + (list "he" "h") + (list "wo" "w") +) + +(list) + +h[e]ll[o] w[o]rld +h[e]ll[o] world +abcdefgﬗꭖϡ +bcdefgﬗꭖϡ +bcdefgﬗ +bcdefgﬗꭖϡ +bcdefgﬗꭖ +gﬗꭖ + +axdefgﬗꭖϡ +--concat-- +hello world +--crypto_sign and crypto_sign_verify-- +valid signature: (true) +--encrypt and decrypt-- +symmetric key encryption +decrypted: hello +public key encryption +decrypted: hello +--print-- +(list + 0 + 1 + 10 + 12 + 100 + 120 + 122 + 1000 + 1000.123 + 10000 + 100000 + 0.1 + 0.01 + 0.001 + 0.0001 + 1e-05 + 1.23456789e-05 + 1.2345e-149 +) +(list + -0 + -1 + -10 + -12 + -100 + -120 + -122 + -1000 + -1000.123 + -10000 + -100000 + -0.1 + -0.01 + -0.001 + -0.0001 + -1e-05 + -1.23456789e-05 + -1.2345e-149 +) +.nan +.infinity +(true) + +(false) + +--total_size-- +10 +--mutate-- +(list + 1 + b + 3 + (list) + (if) + (append) + 7 + 8 + 9 + 3.813073356447116 + 11 + 12 + b + (query_min_difference) + (associate "a" (seq) b b) +) +(list + 1 + 2 + 3 + 4 + (associate "alpha" 5 "beta" 6) + (associate + "nest" + (associate + "count" + (list 7 8 (-)) + ) + "end" + (list (*) (+) 12) + ) +) +--commonality-- +3 +15 +0.25 +0.25 +1 +0.25 +1 +5 +0.48512352272000003 +2 +2 +4 +4 +3.125 +--edit_distance-- +3 +2 +0 +0 +1.02975295456 +3 +3 +1 +--intersect-- +(list + 1 + (- 4 2) + (assoc b 4) +) +(seq 2 1) +(parallel 2 (get_entity_comments) 1) +(list + 1 + 2 + 3 + (assoc b 4) + (if + true + 1 + (parallel + (get_entity_comments) + #label-not-1 1 + ) + ) + (list 5 6) +) +(list + 1 + (associate "b" 4) +) +(replace 4 2 6 1 7) +(list + + ;comment 2 + ;comment 3 + 1 +) +(list 1 2 3) + +--union-- +(seq 2 (get_entity_comments) 1 4 (get_entity_comments)) +(list + 1 + (- 4 2) + (assoc a 3 b 4 c 3) +) +(parallel 2 (get_entity_comments) 1 4) +(list + 1 + 2 + 3 + (assoc a 3 b 4 c 3) + (if + true + 1 + (parallel + (get_entity_comments) + #label-not-1 1 + ) + ) + (list 5 6) +) +(list + 1 + (associate "b" 4 "a" 3 "c" 3) +) +(list 3 4 2) +(list 3 2 4 3) +(list + + ;comment 1 + ;comment 2 + ;comment 3 + ;comment 4 + 1 + + ;comment x + 2 + 4 + 3 + 6 + 5 + 8 + 7 + 10 + 9 + 12 + 11 + 14 + 13 +) +(list + (list 1 2 3) +) + +(list + (list 1 2 3) +) + +(parallel + (list 1 2 3) +) + +--difference-- +(declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (list + a + 2 + c + 4 + d + 6 + q + 8 + e + 10 + f + 12 + g + 14 + ) + ) + ) +) +(declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (assoc + a 2 + c 4 + d 6 + e 10 + f 12 + g 14 + q 8 + ) + ) + ) +) +(declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (list + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + 6 + 8 + (get + (target_value 1) + 4 + ) + (get + (target_value 1) + 5 + ) + 14 + ) + ) + ) +) +(declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (assoc + a 2 + c 4 + d 6 + e 10 + f 12 + g 14 + q 8 + ) + ) + ) +) +(declare + (assoc _ (null)) + (replace + _ + (list "g") + (lambda + (list + (get + (target_value 1) + 0 + ) + 4 + ) + ) + (list) + (lambda + (assoc + a 2 + g (list 1 @(get (get (get (get (target 4) 1) 2) 0) 1)) + ) + ) + ) +) +(declare + (assoc _ (null)) + (replace + _ + (list 3) + (lambda + (list + (get + (target_value 1) + 0 + ) + 4 + ) + ) + (list) + (lambda + (set_type + (list + a + 2 + g + (get + (target_value 1) + 3 + ) + ) + "associate" + ) + ) + ) +) +(declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (assoc + 2 (null) + 5 (null) + 6 (null) + a 1 + ) + ) + ) +) +(declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (assoc 2 (null) 5 (null) 6 (null)) + ) + ) +) +(declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (assoc 2 (null) 5 (null) 6 (null)) + ) + ) +) +(declare + (assoc _ (null)) + (replace + _ + (list 1) + (lambda + (list + (get + (target_value 1) + 0 + ) + 4 + ) + ) + (list) + (lambda + (list + 7 + (get + (target_value 1) + 1 + ) + ) + ) + ) +) +(list + 7 + (list 1 4) +) +(declare + (assoc _ (null)) + (replace + _ + (list 1) + (lambda + (list + (get + (target_value 1) + 0 + ) + 4 + ) + ) + (list 0) + (lambda + (set_type + (list 7 8) + "+" + ) + ) + ) +) +(list + (+ 7 8) + (list 1 4) +) +(declare + (assoc _ (null)) + (replace + _ + (list 1 0) + (lambda + (list + (get + (target_value 1) + 0 + ) + "x" + ) + ) + (list 1) + (lambda + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + 4 + ) + ) + (list) + (lambda + (list + 7 + (get + (target_value 1) + 1 + ) + ) + ) + ) +) +(list + 7 + (list + (list "a" "x") + 1 + 4 + ) +) +--mix-- +(list + 1 + 3.5 + 5.5 + 7.5 + 9.5 + 11.5 + 13.5 +) +(list + + ;comment 1 + ;comment 2 + ;comment 3 + ;comment 4 + 1 + 3.5 + 5.5 + 8 + 9.5 + 11.5 +) +(list + 1 + 2.5 + (associate "a" 3 "b" 4) + (lambda + (if + true + 1 + (parallel (get_entity_comments) 1) + ) + ) + (list 5 6) +) +(list + 1 + 5 + 2.5 + (associate "b" 4) + (lambda + (if + true + 1 + (seq (get_entity_comments) 1) + ) + ) + (list 6) +) +(list + (true) + 3.5 + 5.5 + 7.5 + 9.5 + 11.5 + 13.5 +) +(list + 2 + 3 + 5 + 8 + 10 + 14 + 13 +) +4 +4 +2.5 +2.5 +abcdemxyz +abcomxyz +abcdeomxyz +--mix_labels-- +(list + 1 + #mixtest1 2 + #mixtest2 + (associate "a" 3 "b" 4) + (lambda + (if + #mixtest3 true + 1 + (parallel + (get_entity_comments) + #mixtest4 1 + ) + ) + ) + (list 5 6) +) +--total_entity_size-- +37 +45 +--flatten_entity-- +0.4400722390196604 +0d뫐 aSgl +0.14275602939442433 +0.14275602939442433 +0.17137441590511793 +0.17137441590511793 +flatten restore with seeds test +(let + (assoc + new_entity (first + (create_entities + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + ) + (set_entity_rand_seed new_entity "0d뫐 aSgl") + (set_entity_rand_seed + (first + (create_entities + (append + new_entity + (list "DeepRand") + ) + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + "b(p^fXE7" + ) + new_entity +) +(let + (assoc + new_entity (first + (create_entities + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + ) + (set_entity_rand_seed new_entity "0d뫐 aSgl") + (set_entity_rand_seed + (first + (create_entities + (append + new_entity + (list "DeepRand") + ) + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + "b(p^fXE7" + ) + new_entity +) +(declare + (assoc _ (null)) + (clone_entities _) +) +flatten restore without seeds test +(let + (assoc + new_entity (first + (create_entities + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "DeepRand") + ) + (lambda + (parallel + ##a (rand) + ) + ) + ) + new_entity +) +(let + (assoc + new_entity (first + (create_entities + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + ) + (set_entity_rand_seed new_entity "A%EnDiD") + (set_entity_rand_seed + (first + (create_entities + (append + new_entity + (list "DeepRand") + ) + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + "dAWV\t\rvA" + ) + new_entity +) +(declare + (assoc _ (null)) + (clone_entities _) +) +flatten restore with parallel +(let + (assoc + new_entity (first + (create_entities + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + ) + (set_entity_rand_seed new_entity "0d뫐 aSgl") + ||(parallel + (set_entity_rand_seed + (first + (create_entities + (append + new_entity + (list "DeepRand") + ) + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + "b(p^fXE7" + ) + ) + ||(parallel) + new_entity +) +(let + (assoc + new_entity (first + (create_entities + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + ) + (set_entity_rand_seed new_entity "0d뫐 aSgl") + (set_entity_rand_seed + (first + (create_entities + (append + new_entity + (list "DeepRand") + ) + (lambda + (parallel + ##a (rand) + ) + ) + ) + ) + "b(p^fXE7" + ) + new_entity +) +(declare + (assoc _ (null)) + (clone_entities _) +) +--mutate_entity-- +(list + 1 + 1 + 3 + 4 + a + NKofOe + 7 + 8 + 9 + (sqrt) + 11 + 12 + 13 + 14 + (associate "a" (contains_value) "b" 2) +) + +a +(list + 1 + (-) + (+) + 4 + (-) + 6 + 7 + (-) + 9 + 10 + 11 + 12 + (-) + 14 + (associate (-) 1 (+) 2) +) + +--commonality_entities-- +23.367956500877984 +--edit_distance_entities-- +35.26408699824403 +--intersect_entities-- +(associate "b" 4) +MergeEntityChild2 +(associate "p" 3 "q" 4) +MergeEntityChild1 +(associate "x" 3 "y" 4) +_2430269628 +(associate "e" 3 "f" 4) +_3605899036 +(associate "E" 3 "F" 4) +--union_entities-- +(associate "b" 4 "a" 3 "c" 3) +MergeEntityChild2 +(associate + "p" + 3 + "q" + 4 + "u" + 5 + "v" + 6 + "w" + 7 +) +MergeEntityChild1 +(associate "x" 3 "y" 4 "z" 5) +_2430269628 +(associate + "e" + 3 + "f" + 4 + "g" + 5 + "h" + 6 +) +_3605899036 +(associate + "E" + 3 + "F" + 4 + "G" + 5 + "H" + 6 +) +(parallel + ##p + (list "_20985349" "_111317518" "_20985349" "_111317518") +) +_20985349 +(associate + "E" + 3 + "F" + 4 + "G" + 5 + "H" + 6 +) +_111317518 +(associate + "e" + 3 + "f" + 4 + "g" + 5 + "h" + 6 +) +--difference_entities-- +(declare + (assoc _ (null)) + (clone_entities _) +) +(declare + (assoc _ (null)) + (let + (assoc + new_entity (first + (create_entities + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + "c" + 3 + (get + (target_value + 1 + ) + 2 + ) + (get + (target_value + 1 + ) + 3 + ) + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root _) + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "MergeEntityChild1") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + (get + (target_value 1) + 3 + ) + "z" + 5 + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "MergeEntityChild1") + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "MergeEntityChild2") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + (get + (target_value 1) + 3 + ) + "u" + 5 + "v" + 6 + "w" + 7 + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "MergeEntityChild2") + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "_1984154169") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + (get + (target_value 1) + 3 + ) + "G" + 5 + "H" + 6 + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "_1984154169") + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "_2509820770") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + (get + (target_value 1) + 3 + ) + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "_2509820770") + ) + ) + ) + ) + ) + new_entity + ) +) +_1472511973 +(list "DiffEntityChild1" "OnlyIn2" "_2861876625" "_3906509543") +(declare + (assoc _ (null)) + (let + (assoc + new_entity (first + (create_entities + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + "c" + 3 + (get + (target_value + 1 + ) + 2 + ) + (get + (target_value + 1 + ) + 3 + ) + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root _) + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "DiffEntityChild1") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + (get + (target_value 1) + 3 + ) + (get + (target_value 1) + 4 + ) + 5 + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "DiffEntityChild1") + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "OnlyIn2") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda) + (associate "o" 6) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "OnlyIn2") + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "_2861876625") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + (get + (target_value 1) + 3 + ) + "G" + 5 + "H" + 6 + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "_2861876625") + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "_3906509543") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + (get + (target_value 1) + 3 + ) + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "_3906509543") + ) + ) + ) + ) + ) + (clone_entities + (append + _ + (list "DiffEntityChild1" "DiffEntityChild2") + ) + (append + new_entity + (list "DiffEntityChild1" "DiffEntityChild2") + ) + ) + new_entity + ) +) +_4022098522 +(associate "c" 3 "b" 4) +(associate "x" 3 "y" 4 "z" 5) +(list "DiffEntityChild1" "OnlyIn2" "_2861876625" "_3906509543") +(declare + (assoc _ (null)) + (let + (assoc + new_entity (first + (create_entities + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + "c" + 3 + (get + (target_value + 1 + ) + 2 + ) + (get + (target_value + 1 + ) + 3 + ) + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root _) + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "OnlyIn2") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda) + (associate "o" 6) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "OnlyIn2") + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "_3778675809") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + (get + (target_value 1) + 3 + ) + "G" + 5 + "H" + 6 + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "_3778675809") + ) + ) + ) + ) + ) + (create_entities + (append + new_entity + (list "_80566068") + ) + (call + (lambda + (declare + (assoc _ (null)) + (replace + _ + (list) + (lambda + (set_type + (list + (get + (target_value 1) + 0 + ) + (get + (target_value 1) + 1 + ) + (get + (target_value 1) + 2 + ) + (get + (target_value 1) + 3 + ) + ) + "associate" + ) + ) + ) + ) + ) + (assoc + _ (retrieve_entity_root + (append + _ + (list "_80566068") + ) + ) + ) + ) + ) + (clone_entities + (append + _ + (list "DiffEntityChild1") + ) + (append + new_entity + (list "DiffEntityChild1") + ) + ) + new_entity + ) +) +_30261739 +(associate "c" 3 "b" 4) +(associate "x" 3 "y" 4 "z" 6) +(list "OnlyIn2" "_3778675809" "_80566068" "DiffEntityChild1") +--mix_entities-- +(associate "b" 4 "a" 3) +MergeEntityChild2 +(associate + "p" + 3 + "q" + 4 + "u" + 5 + "v" + 6 +) +MergeEntityChild1 +(associate "x" 3 "y" 4) +_2430269628 +(associate + "e" + 3 + "f" + 4 + "g" + 5 + "h" + 6 +) +_3605899036 +(associate "E" 3 "F" 4 "H" 6) +--get_entity_comments-- +Full test +This is a suite of unit tests. + This is the second line of the unit test description. +this is a fully described entity +(assoc foo "the function foo" get_api "returns the api details" publicvar "some public variable") +(assoc x "the value of x\r\nthe default value of x" y "the value of y") +(assoc + description "this is a fully described entity" + labels (assoc + foo (assoc + description "the function foo" + parameters (assoc x "the value of x\r\nthe default value of x" y "the value of y") + ) + get_api (assoc description "returns the api details" parameters (null)) + publicvar (assoc description "some public variable" parameters (null)) + ) +) +--retrieve_entity_root-- +(list + 1 + 2 + ##three 3 +) +(list + 1 + 2 + #three 3 +) +--assign_entity_roots-- +(list 4 5 6) +--accum_entity_roots-- +(null + #a 1 + #b 2 + #c 3 +) +(null + #c 3 +) +--get_entity_rand_seed-- +0.4592555439678171 ++Yvw +--set_entity_rand_seed-- +0.7000791742912007 +0.7000791742912007 +deep sets +0.4762978676528419 +0.4762978676528419 +--get_entity_root_permission-- +(false) + +--set_entity_root_permission-- +RootTest +1693777319.008049 +(true) + +RootTest +(null) +(false) + +--create_entities-- +(list "MyLibrary") + +--nested create_entities-- +(list "Child1" "Child2") +(list "MultipleTest1") + +(list "MultipleTest2") + +--clone_entities-- +(list "MyNewLibrary") + +--move_entities-- +(list "MyLibrary2") + +--destroy_entities-- +(list + "MergeEntity1" + "MergeEntity2" + "FlattenTest" + "_188112517" + "_2375701353" + "_4247396121" + "_1437856364" + "_2356538064" + "_360979289" + "MutateEntity" + "MutatedEntity" + "MutatedEntity2" + "MutatedEntity3" + "AndedEntities" + "OredEntities" + "ComplexMergeEntity1" + "ComplexMergeEntity2" + "ComplexOredEntities" + "_1472511973" + "DiffEntity1" + "DiffEntity2" + "_4022098522" + "DiffContainer" + "_30261739" + "MixedEntities" + "descriptive_entity" + "SetGetCodeTest" + "AER_test" + "AER_test_2" + "RandTest" + "RootTest" + "MyNewLibrary" + "EntityWithChildren" + "MultipleTest1" + "MultipleTest2" + "MyLibrary2" +) +(list + "MergeEntity1" + "MergeEntity2" + "FlattenTest" + "_188112517" + "_2375701353" + "_4247396121" + "_1437856364" + "_2356538064" + "_360979289" + "MutateEntity" + "MutatedEntity" + "MutatedEntity2" + "MutatedEntity3" + "AndedEntities" + "OredEntities" + "ComplexMergeEntity1" + "ComplexMergeEntity2" + "ComplexOredEntities" + "_1472511973" + "DiffEntity1" + "DiffEntity2" + "_4022098522" + "DiffContainer" + "_30261739" + "MixedEntities" + "descriptive_entity" + "SetGetCodeTest" + "AER_test" + "AER_test_2" + "RandTest" + "RootTest" + "MyNewLibrary" + "EntityWithChildren" + "MultipleTest1" + "MultipleTest2" +) +(list + "MergeEntity1" + "MergeEntity2" + "FlattenTest" + "_188112517" + "_2375701353" + "_4247396121" + "_1437856364" + "_2356538064" + "_360979289" + "MutateEntity" + "MutatedEntity" + "MutatedEntity2" + "MutatedEntity3" + "AndedEntities" + "OredEntities" + "ComplexMergeEntity1" + "ComplexMergeEntity2" + "ComplexOredEntities" + "_1472511973" + "DiffEntity1" + "DiffEntity2" + "_4022098522" + "DiffContainer" + "_30261739" + "MixedEntities" + "descriptive_entity" + "SetGetCodeTest" + "AER_test" + "AER_test_2" + "RandTest" + "RootTest" + "MyNewLibrary" + "EntityWithChildren" +) +--load-- +(assoc + a #a 1 + b #b (true) + c #c 0.1 + d #d 100000000 + hello #hello + (print "hello\n") +) +load from .json: +(list + (assoc a 3 b 4) + (assoc c "c" d (null)) +) +load from .yaml: +(list + (assoc + a 123 + b "ABC" + c 123.45 + d "true" + ) +) +--load_entity-- +load from .amlg: +hello +(let + (assoc + new_entity (first + (create_entities + (lambda + (assoc + a ##a 1 + b ##b (true) + c ##c 0.1 + d ##d 100000000 + hello ##hello + (print "hello\n") + ) + ) + ) + ) + ) + (set_entity_rand_seed new_entity "rGkgUn : (false) + +>= : (false) + +~ : (true) + +list : (list 1 1 1 2) + +associate : (assoc + a 1 + b 1 + c 1 + d 2 +) + +assoc : (assoc + a 1 + b 1 + c 1 + d 2 +) + +map list : (list 2 4 6 8) + +map assoc : (assoc + a 2 + b 4 + c 6 + d 8 +) + +filter list : (list 3 4) + +filter assoc : (assoc 10 1) + +filter assoc 2 : (assoc 10 1 20 2) + +Expecting 1000: 1000 +--concurrent entity writes-- +concurrent entity writes successful: (true) + +--total execution time-- +1.288715124130249 diff --git a/src/Amalgam/rand/RandomStream.cpp b/src/Amalgam/rand/RandomStream.cpp new file mode 100644 index 00000000..e952fb49 --- /dev/null +++ b/src/Amalgam/rand/RandomStream.cpp @@ -0,0 +1,122 @@ +//project headers: +#include "RandomStream.h" + +#include "murmurhash3/MurmurHash3.h" + +//system headers: +#include +#include + +#define RANDOM_STATE_SIZE (sizeof(int64_t) * 2 + 1) + +RandomStream::RandomStream(const std::string initial_state) +{ + increment = 0; + state = 0; + SetState(initial_state); +} + +std::string RandomStream::GetState() +{ + char s[RANDOM_STATE_SIZE]; + s[0] = static_cast(255 & (state >> 56)); + s[1] = static_cast(255 & (state >> 48)); + s[2] = static_cast(255 & (state >> 40)); + s[3] = static_cast(255 & (state >> 32)); + s[4] = static_cast(255 & (state >> 24)); + s[5] = static_cast(255 & (state >> 16)); + s[6] = static_cast(255 & (state >> 8)); + s[7] = static_cast(255 & (state >> 0)); + + s[ 8] = static_cast(255 & (increment >> 56)); + s[ 9] = static_cast(255 & (increment >> 48)); + s[10] = static_cast(255 & (increment >> 40)); + s[11] = static_cast(255 & (increment >> 32)); + s[12] = static_cast(255 & (increment >> 24)); + s[13] = static_cast(255 & (increment >> 16)); + s[14] = static_cast(255 & (increment >> 8)); + s[15] = static_cast(255 & (increment >> 0)); + + //use an in-band way of indicating whether the seed has been validated + // the worst that will happen is the random number generator will yield two zeros in a row + //so in the 1 in ~4 billion chance that the last part of the state is all 1's, it will yield that + // anomalous set of random numbers + //this class will always ensure that the state has been initialized + s[16] = static_cast(0xFF); + + return std::string(&s[0], RANDOM_STATE_SIZE); +} + +void RandomStream::SetState(const std::string &new_state) +{ + uint8_t s[RANDOM_STATE_SIZE]; + std::memset(&s[0], 0, RANDOM_STATE_SIZE); + std::memcpy(&s[0], new_state.c_str(), std::min(new_state.size(), RANDOM_STATE_SIZE)); + + state = (static_cast(s[0]) << 56) | (static_cast(s[1]) << 48) + | (static_cast(s[2]) << 40) | (static_cast(s[3]) << 32) + | (static_cast(s[4]) << 24) | (static_cast(s[5]) << 16) + | (static_cast(s[6]) << 8) | static_cast(s[7]); + + increment = (static_cast(s[8 + 0]) << 56) | (static_cast(s[8 + 1]) << 48) + | (static_cast(s[8 + 2]) << 40) | (static_cast(s[8 + 3]) << 32) + | (static_cast(s[8 + 4]) << 24) | (static_cast(s[8 + 5]) << 16) + | (static_cast(s[8 + 6]) << 8) | static_cast(s[8 + 7]); + + //if the state hasn't been declared as initialized, burn through exactly two random numbers to + // prevent make sure it is in a good state based on the paper cited in this class + if(s[16] != 0xFF) + BurnIn(); +} + +std::string RandomStream::CreateOtherStreamStateViaString(const std::string &seed_string) +{ + char s[RANDOM_STATE_SIZE]; + std::memset(&s[0], 0, RANDOM_STATE_SIZE); + MurmurHash3_x64_128(seed_string.c_str(), static_cast(seed_string.size()), static_cast(state & 0xFFFFFFFF), &s[0]); + + //randomize the hash based on the current random state + *(reinterpret_cast(&s[0])) ^= state; + *(reinterpret_cast(&s[sizeof(uint64_t)])) ^= increment; + + return std::string(&s[0], RANDOM_STATE_SIZE); +} + +RandomStream RandomStream::CreateOtherStreamViaString(const std::string &seed_string) +{ + RandomStream new_stream; + + char s[RANDOM_STATE_SIZE]; + std::memset(&s[0], 0, RANDOM_STATE_SIZE); + MurmurHash3_x64_128(seed_string.c_str(), static_cast(seed_string.size()), static_cast(state & 0xFFFFFFFF), &s[0]); + + //randomize the hash based on the current random state + new_stream.state = ( *(reinterpret_cast(&s[0])) ^ state ); + new_stream.increment = ( *(reinterpret_cast(&s[sizeof(uint64_t)])) ^ increment ); + + new_stream.BurnIn(); + + return new_stream; +} + +RandomStream RandomStream::CreateOtherStreamViaRand() +{ + RandomStream new_stream; + new_stream.state = ((static_cast(RandUInt32()) << 32) | RandUInt32()); + new_stream.increment = ((static_cast(RandUInt32()) << 32) | RandUInt32()); + new_stream.BurnIn(); + + return new_stream; +} + +uint32_t RandomStream::RandUInt32() +{ + //perform PCG random number generation + //based on this: www.pcg-random.org/download.html + uint64_t old_value = state; + state = old_value * 6364136223846793005ULL + (increment | 1); + + uint32_t xor_shifted = static_cast(((old_value >> 18u) ^ old_value) >> 27u); + uint32_t rot = static_cast(old_value >> 59u); + return (xor_shifted >> rot) | (xor_shifted << ((-static_cast(rot)) & 31)); +} diff --git a/src/Amalgam/rand/RandomStream.h b/src/Amalgam/rand/RandomStream.h new file mode 100644 index 00000000..9801b2a1 --- /dev/null +++ b/src/Amalgam/rand/RandomStream.h @@ -0,0 +1,268 @@ +#pragma once + +//project headers: +#include "../FastMath.h" + +//system headers: +#include +#include +#include +#include + +//Implements a stateful stream of random numbers that can be serialized/deserialized easily into a +// very small amount of data, based on: +// ONeill, Melissa E. "PCG: A family of simple fast space-efficient statistically good algorithms +// for random number generation." ACM Transactions on Mathematical Software(2014). +//More info at https://www.pcg-random.org +class RandomStream +{ +public: + constexpr RandomStream() + : increment(0), state(0) + { } + + RandomStream(const std::string initial_state); + + constexpr RandomStream(const RandomStream &stream) + : increment(stream.increment), state(stream.state) + { } + + //gets the current state of the random stream in string form + std::string GetState(); + + //sets (seeds) the current state of the random stream based on string + void SetState(const std::string &new_state); + + //returns a random seed based on this stream's current state and seed_string parameter + std::string CreateOtherStreamStateViaString(const std::string &seed_string); + + //returns a Randomstream based on this stream's current state and seed_string parameter + RandomStream CreateOtherStreamViaString(const std::string &seed_string); + + //consumes random numbers from the stream to create a new RandomStream + RandomStream CreateOtherStreamViaRand(); + + //returns a value in the range [0.0,1.0) with 32 bits of randomness + inline double Rand() + { + return std::ldexp(RandUInt32(), -32); + } + + //returns a value in the range [0.0,1.0) with full mantissa of randomness + inline double RandFull() + { + uint64_t combined = (static_cast(RandUInt32()) << 32) | static_cast(RandUInt32()); + return std::ldexp(static_cast(combined & ((static_cast(1) << 53) - 1)), -53); + } + + //returns a uint32_t random number + uint32_t RandUInt32(); + + inline size_t RandSize(size_t max_size) + { + if(max_size < std::numeric_limits::max()) + return (RandUInt32() % max_size); + + //else 64-bit + size_t r = ((static_cast(RandUInt32()) << 32) | RandUInt32()); + return r % max_size; + } + + //returns a positive number chosen from the exponential distribution with specified mean + inline double ExponentialRand(double mean) + { + return -std::log(1.0 - RandFull()) * mean; + } + +protected: + + //based on the published literature, burns through the minimum number of random numbers + // to make sure the subsequent stream is good + inline void BurnIn() + { + RandUInt32(); + RandUInt32(); + } + + //current state / seed of the random stream + uint64_t increment; + uint64_t state; +}; + +//class to enable std::priority_queue to be able to clear and reserve buffers, but requires containers that +//support those operations +template, class Compare = std::less > +class FlexiblePriorityQueue : public std::priority_queue +{ +public: + //inherit all constructors + using std::priority_queue::priority_queue; + + __forceinline void Reserve(size_t reserve_size) + { + //this-> is needed for some compilers to give access due to how the STL is implemented + this->c.reserve(reserve_size); + } + + __forceinline void clear() + { + //this-> is needed for some compilers to give access due to how the STL is implemented + this->c.clear(); + } +}; + +//Priority queue that, when receiving values of equal priority, will randomize the order they are stored and popped off the queue +//Requires the type T to have both the < and == operators +//The constructor requires a seed +template +class StochasticTieBreakingPriorityQueue +{ +public: + + typedef std::vector> PriorityQueueContainerType; + + StochasticTieBreakingPriorityQueue() : + priorityQueue(StochasticTieBreakingComparator()) + { } + + //seeds the priority queue + StochasticTieBreakingPriorityQueue(std::string seed) + : priorityQueue(StochasticTieBreakingComparator()), randomStream(seed) + { } + + StochasticTieBreakingPriorityQueue(RandomStream stream) + : priorityQueue(StochasticTieBreakingComparator()), randomStream(stream) + { } + + __forceinline void SetSeed(std::string seed) + { + randomStream.SetState(seed); + } + + __forceinline void SetStream(RandomStream stream) + { + randomStream = stream; + } + + //these functions mimic their respective std::priority_queue functions + __forceinline size_t Size() + { + return priorityQueue.size(); + } + + __forceinline void Reserve(size_t reserve_size) + { + //reserve an extra element because pushing a value on the top and popping one off requires having an extra space + priorityQueue.Reserve(reserve_size + 1); + } + + __forceinline void clear() + { + priorityQueue.clear(); + } + + __forceinline const T &Top() const + { + return priorityQueue.top().first; + } + + __forceinline void Push(const T &val) + { + priorityQueue.emplace(val, randomStream.RandUInt32()); + } + + //like Push but keeps only max_size elements + inline void PushAndOnlyKeepSize(const T &val, size_t max_size) + { + //always push if need more + if(priorityQueue.size() < max_size) + { + priorityQueue.emplace(val, randomStream.RandUInt32()); + return; + } + + auto &top = priorityQueue.top(); + if(val < top.first) + { + //better, so exchange it + priorityQueue.pop(); + priorityQueue.emplace(val, randomStream.RandUInt32()); + } + else if(val == top.first) + { + //good enough to consider for top, check random + uint32_t r = randomStream.RandUInt32(); + + //if won the random selection, then push it on the stack + if(r < top.second) + { + priorityQueue.pop(); + priorityQueue.emplace(val, r); + } + } + //otherwise don't need to do anything, val is not better than the worst on the stack + } + + //like PushAndOnlyKeepSize, but keeps the current size of the priority queue + //requires that there is at least one element in the priority queue + //returns the top element after the push and pop has been completed + inline const T &PushAndPop(const T &val) + { + auto &top = priorityQueue.top(); + if(val < top.first) + { + //better, so exchange it + priorityQueue.pop(); + priorityQueue.emplace(val, randomStream.RandUInt32()); + + return priorityQueue.top().first; + } + else if(val == top.first) + { + //good enough to consider for top, check random + uint32_t r = randomStream.RandUInt32(); + + //if won the random selection, then push it on the stack + if(r < top.second) + { + priorityQueue.pop(); + priorityQueue.emplace(val, r); + + //return new top of stack + return priorityQueue.top().first; + } + + //current top of stack won, return current top + } + //otherwise don't need to do anything, val is not better than the worst on the stack + + return top.first; + } + + __forceinline void Pop() + { + priorityQueue.pop(); + } + + __forceinline bool Empty() + { + return priorityQueue.empty(); + } + +protected: + + //used to compare first by the value, second by the random number if equal + class StochasticTieBreakingComparator + { + public: + constexpr bool operator()(const std::pair &a, const std::pair &b) + { + if(a.first == b.first) + return a.second < b.second; + return a.first < b.first; + } + }; + + FlexiblePriorityQueue, PriorityQueueContainerType, StochasticTieBreakingComparator> priorityQueue; + RandomStream randomStream; +}; diff --git a/src/Amalgam/rand/WeightedDiscreteRandomStream.h b/src/Amalgam/rand/WeightedDiscreteRandomStream.h new file mode 100644 index 00000000..c31c37b5 --- /dev/null +++ b/src/Amalgam/rand/WeightedDiscreteRandomStream.h @@ -0,0 +1,254 @@ +#pragma once + +//project headers: +#include "RandomStream.h" + +//system headers: +#include +#include +#include + +//default Functor for the default type for ProbabilityAsDoubleFunctor to transform probability values into doubles +class DoubleAsDouble +{ +public: + constexpr double operator()(double value) + { + return value; + } +}; + +//Normalizes the probabilities; if any probabilities are infinity, it will equally uniformally normalize over just the infinite values +template +void NormalizeProbabilities(ContainerType &probabilities) +{ + //find total mass + double total_mass = 0.0; + for(auto &p : probabilities) + total_mass += p; + + //if less than infinity, just normalize + if(total_mass < std::numeric_limits::infinity()) + { + for(auto &p : probabilities) + p /= total_mass; + } + else //if found one infinity, then need to normalize over just the infinities + { + for(auto &p : probabilities) + { + if(p != std::numeric_limits::infinity()) + p = 0.0; + else + p = 1.0; + } + NormalizeProbabilities(probabilities); + } +} + +//Normalizes the probabilities; if any probabilities are infinity, it will equally uniformally normalize over just the infinite values +template +void NormalizeProbabilitiesMap(ContainerType &probabilities_map) +{ + //find total mass + double total_mass = 0.0; + for(auto &[_, p] : probabilities_map) + total_mass += p; + + //if less than infinity, just normalize + if(total_mass < std::numeric_limits::infinity()) + { + for(auto &[_, p] : probabilities_map) + p /= total_mass; + } + else //if found one infinity, then need to normalize over just the infinities + { + for(auto &[_, p] : probabilities_map) + { + if(p != std::numeric_limits::infinity()) + p = 0.0; + else + p = 1.0; + } + NormalizeProbabilitiesMap(probabilities_map); + } +} + +//Will return a random index, weighted by the values in probabilities based on the specified RandomStream +// if normalize is true, then it will normalize the probabilities in place +template +size_t WeightedDiscreteRandomSample(ContainerType &probabilities, RandomStream &rs, bool normalize = false) +{ + if(normalize) + NormalizeProbabilities(probabilities); + + double r = rs.Rand(); + size_t selected_element = 0; + double probability_mass = 0.0; + + for(; selected_element < probabilities.size(); selected_element++) + { + probability_mass += probabilities[selected_element]; + + if(r <= probability_mass) + return selected_element; + } + + //should only make it here when the numerical precision is off (i.e., didn't add up to 1 exactly) + //moved past the end, so return the one prior + return selected_element - 1; +} + +//Will return a random index, weighted by the values in probabilities based on the specified RandomStream +// if normalize is true, then it will normalize the probabilities in place +// requires that probabilities_map be non-empty +template +ValueType WeightedDiscreteRandomSampleMap(ContainerType &probabilities_map, RandomStream &rs, bool normalize = false) +{ + if(normalize) + NormalizeProbabilitiesMap(probabilities_map); + + double r = rs.Rand(); + ValueType selected_element = 0; + double probability_mass = 0.0; + + for(auto &[key, prob] : probabilities_map) + { + selected_element = key; + probability_mass += prob; + + if(r <= probability_mass) + return key; + } + + //should only make it here when the numerical precision is off (i.e., didn't add up to 1 exactly) + //just grab the first available + return selected_element; +} + +//Class for creating a stream of random values (of type ValueType) based on weighted buckets of values specified by ValueType +//Implements the Alias method as described in +// Vose, Michael D. (September 1991). "A linear algorithm for generating random numbers with a given distribution" (PDF). IEEE Transactions on Software Engineering. 17 (9): 972975. +template, typename ProbabilityAsDoubleFunctor = DoubleAsDouble> +class WeightedDiscreteRandomStreamTransform +{ +public: + + WeightedDiscreteRandomStreamTransform() + { } + + WeightedDiscreteRandomStreamTransform(const MapType &map, bool normalize = false) + { + Initialize(map, normalize); + } + + //initializes like the constructor + void Initialize(const MapType &map, bool normalize = false) + { + //pull out data from input + std::vector probabilities; + probabilities.reserve(map.size()); + valueTable.reserve(map.size()); + + //iterate over all input and grab values + ProbabilityAsDoubleFunctor transform_to_double; + for(auto &[key, prob] : map) + { + valueTable.push_back(key); + probabilities.push_back(transform_to_double(prob)); + } + + InitializeAliasTable(probabilities, normalize); + } + + WeightedDiscreteRandomStreamTransform(std::vector &values, std::vector &probabilities, bool normalize = false) + { + valueTable = values; + InitializeAliasTable(probabilities, normalize); + } + + //pre-computes the alias tables given a probability distribution + // if normalize is true, then it will sum all probabilities and divide by the sum such that they sum to 1.0 + // if num_elements is nonzero, then it will preallocate that number of elements + void InitializeAliasTable(std::vector &probabilities, bool normalize) + { + if(normalize) + NormalizeProbabilities(probabilities); + + probabilityTable.resize(probabilities.size()); + aliasTable.resize(probabilities.size()); + + //separate values into smaller and larger than what a uniform distribution would yield + std::vector small_probs; + std::vector large_probs; + double uniform_probability = 1.0 / probabilities.size(); + for(size_t i = 0; i < probabilities.size(); i++) + { + if(probabilities[i] >= uniform_probability) + large_probs.push_back(i); + else + small_probs.push_back(i); + } + + //until have run out of probability + while(!small_probs.empty() && !large_probs.empty()) + { + size_t less = small_probs.back(); + small_probs.pop_back(); + size_t more = large_probs.back(); + large_probs.pop_back(); + + //scale probabilities so that 1.0 is the value that would be given for a uniform distribution + probabilityTable[less] = probabilities[less] * probabilities.size(); + aliasTable[less] = more; + + //adjust probabilities + probabilities[more] = probabilities[more] + probabilities[less] - uniform_probability; + + //if excess probability, put it on the respective list + if(probabilities[more] >= uniform_probability) + large_probs.push_back(more); + else + small_probs.push_back(more); + } + + ///use any remaining probability mass + while(!small_probs.empty()) + { + probabilityTable[small_probs.back()] = 1.0; + small_probs.pop_back(); + } + + while(!large_probs.empty()) + { + probabilityTable[large_probs.back()] = 1.0; + large_probs.pop_back(); + } + } + + //returns true if initialized + bool IsInitialized() + { + return aliasTable.size() > 0; + } + + //returns a value based on the value's probability mass + ValueType WeightedDiscreteRand(RandomStream &rs) + { + size_t bucket = (rs.RandUInt32() % probabilityTable.size()); + bool pick_alias = (rs.Rand() < probabilityTable[bucket]); + + size_t value_index = (pick_alias ? bucket : aliasTable[bucket]); + return valueTable[value_index]; + } + +private: + //which element is aliased with the current position + std::vector aliasTable; + + //probability of each element + std::vector probabilityTable; + + //the value corresponding to each element in probabilityTable + std::vector valueTable; +}; diff --git a/src/Amalgam/resource.h b/src/Amalgam/resource.h new file mode 100644 index 00000000..2d7667c1 Binary files /dev/null and b/src/Amalgam/resource.h differ diff --git a/src/Amalgam/string/StringInternPool.cpp b/src/Amalgam/string/StringInternPool.cpp new file mode 100644 index 00000000..d4a9b368 --- /dev/null +++ b/src/Amalgam/string/StringInternPool.cpp @@ -0,0 +1,118 @@ +//project headers: +#include "StringInternPool.h" + +StringInternPool string_intern_pool; + +const std::string &StringInternPool::GetStringFromID(StringInternPool::StringID id) +{ +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(sharedMutex); +#endif + + return idToStringAndRefCount[id].first; +} + +StringInternPool::StringID StringInternPool::GetIDFromString(const std::string &str) +{ +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(sharedMutex); +#endif + + auto id_iter = stringToID.find(str); + if(id_iter == end(stringToID)) + return NOT_A_STRING_ID; //the string was never entered in and don't want to cause more errors + + return id_iter->second; +} + +StringInternPool::StringID StringInternPool::CreateStringReference(const std::string &str) +{ + if(str.size() == 0) + return EMPTY_STRING_ID; + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::WriteLock lock(sharedMutex); +#endif + + //try to insert it as a new string + auto [inserted_id, inserted] = stringToID.insert(std::make_pair(str, 0)); + if(inserted) + { + StringInternPool::StringID id; + //new string, see if any ids are ready for reuse + if(unusedIDs.size() > 0) + { + //reuse existing, so overwrite it + id = unusedIDs.top(); + unusedIDs.pop(); + idToStringAndRefCount[id] = std::make_pair(str, 1); + } + else //need a new one + { + id = idToStringAndRefCount.size(); + idToStringAndRefCount.emplace_back(std::make_pair(str, 1)); + } + + //store the id along with the string + inserted_id->second = id; + + return id; + } + + //found, so count the reference if applicable + StringInternPool::StringID id = inserted_id->second; + if(!IsStringIDStatic(id)) + idToStringAndRefCount[id].second++; + return id; +} + +StringInternPool::StringID StringInternPool::CreateStringReference(StringInternPool::StringID id) +{ + if(IsStringIDStatic(id)) + return id; + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //only need a ReadLock because the count is atomic + Concurrency::ReadLock lock(sharedMutex); +#endif + IncrementRefCount(id); + + return id; +} + +void StringInternPool::DestroyStringReference(StringInternPool::StringID id) +{ + if(IsStringIDStatic(id)) + return; + + //get the reference count before decrement +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //make sure have a readlock first so that the idToStringAndRefCount vector heap location doesn't change + Concurrency::ReadLock lock(sharedMutex); +#endif + + int64_t refcount = DecrementRefCount(id); + + //if other references, then can't clear it; signed, so it won't wrap around + if(refcount > 1) + return; + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //this thread is about to free the reference, but need to acquire a write lock + // so, keep the reference alive by incrementing it *before* attempting the write lock + IncrementRefCount(id); + + //grab a write lock + lock.unlock(); + Concurrency::WriteLock write_lock(sharedMutex); + + //with the write lock, decrement reference count in case this string should stay active + refcount = DecrementRefCount(id); + + //if other references, then can't clear it + if(refcount > 1) + return; +#endif + + RemoveId(id); +} diff --git a/src/Amalgam/string/StringInternPool.h b/src/Amalgam/string/StringInternPool.h new file mode 100644 index 00000000..f45134d4 --- /dev/null +++ b/src/Amalgam/string/StringInternPool.h @@ -0,0 +1,463 @@ +#pragma once + +//project headers: +#include "Concurrency.h" +#include "HashMaps.h" +#include "PlatformSpecific.h" + +//system headers: +#include +#include +#include + +//manages all strings so they can be referred and compared easily by integers, across threads +//depends on a method defined outside of this class, StringInternPool::InitializeStaticStrings() +// to set up all internal static strings; see the function's declaration for details +class StringInternPool +{ +public: + using StringID = size_t; + using StringToStringIDAssoc = FastHashMap; + + //indicates that it is not a string, like NaN or null + static constexpr size_t NOT_A_STRING_ID = 0; + static constexpr size_t EMPTY_STRING_ID = 1; + inline static const std::string EMPTY_STRING = std::string(""); + + inline StringInternPool() + { + InitializeStaticStrings(); + } + + //translates the id to a string, empty string if it does not exist + const std::string &GetStringFromID(StringID id); + + //translates the string to the corresponding ID, 0 is the empty string, maximum value of size_t means it does not exist + StringID GetIDFromString(const std::string &str); + + //makes a new reference to the string specified, returning the ID + StringID CreateStringReference(const std::string &str); + + //makes a new reference to the string id specified, returning the id passed in + StringID CreateStringReference(StringID id); + + //creates new references from the references container and function + template + inline void CreateStringReferences(ReferencesContainer &references_container, + GetStringIdFunction get_string_id = [](auto sid) { return sid; }) + { + if(references_container.size() == 0) + return; + + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //only need a ReadLock because the count is atomic + Concurrency::ReadLock lock(sharedMutex); + #endif + + for(auto r : references_container) + { + StringID id = get_string_id(r); + if(IsStringIDStatic(id)) + continue; + + IncrementRefCount(id); + } + } + + //creates additional_reference_count new references from the references container and function + // specialized for size_t indexed containers, where the index is desired + template + inline void CreateMultipleStringReferences(ReferencesContainer &references_container, + size_t additional_reference_count, + GetStringIdFunction get_string_id = [](auto sid) { return sid; }) + { + if(references_container.size() == 0) + return; + + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //only need a ReadLock because the count is atomic + Concurrency::ReadLock lock(sharedMutex); + #endif + + for(auto r : references_container) + { + StringID id = get_string_id(r); + if(IsStringIDStatic(id)) + continue; + + AdvanceRefCount(id, additional_reference_count); + } + } + + //creates new references from the references container and function + // specialized for size_t indexed containers, where the index is desired + template + inline void CreateStringReferencesByIndex(ReferencesContainer &references_container, + GetStringIdFunction get_string_id = [](auto sid) { return sid; }) + { + if(references_container.size() == 0) + return; + + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //only need a ReadLock because the count is atomic + Concurrency::ReadLock lock(sharedMutex); + #endif + + for(size_t i = 0; i < references_container.size(); i++) + { + StringID id = get_string_id(references_container[i], i); + if(IsStringIDStatic(id)) + continue; + + IncrementRefCount(id); + } + } + + //removes a reference to the string specified by the ID + void DestroyStringReference(StringID id); + + //creates new references from the references container and function + template + inline void DestroyStringReferences(ReferencesContainer &references_container, + GetStringIdFunction get_string_id = [](auto sid) { return sid; }) + { + #if !defined(MULTITHREAD_SUPPORT) && !defined(MULTITHREAD_INTERFACE) + for(auto r : references_container) + DestroyStringReference(get_string_id(r)); + #else + if(references_container.size() == 0) + return; + + //only need a ReadLock because the count is atomic + Concurrency::ReadLock lock(sharedMutex); + + //as it goes through, if any id needs removal, will set this to true so that + // removal can be done after refernce count decreases are done + bool ids_need_removal = false; + + for(auto r : references_container) + { + StringID id = get_string_id(r); + if(IsStringIDStatic(id)) + continue; + + int64_t refcount = DecrementRefCount(id); + + //if extra references, just return, but if it is 1, then it will try to clear + if(refcount == 1) + ids_need_removal = true; + } + + if(!ids_need_removal) + return; + + //need to remove at least one reference, so put all counts back while wait for write lock + for(auto r : references_container) + { + StringID id = get_string_id(r); + if(!IsStringIDStatic(id)) + IncrementRefCount(id); + } + + //grab a write lock + lock.unlock(); + Concurrency::WriteLock write_lock(sharedMutex); + + for(auto r : references_container) + { + StringID id = get_string_id(r); + if(IsStringIDStatic(id)) + continue; + + //remove any that are the last reference + int64_t refcount = DecrementRefCount(id); + if(refcount == 1) + RemoveId(id); + } + + #endif + } + + //returns the number of strings that are still allocated + //even when "empty" it will still return 2 since the NOT_A_STRING_ID and EMPTY_STRING_ID take up slots + inline size_t GetNumStringsInUse() + { return stringToID.size(); } + + //returns the number of non-static strings that are still in use + size_t GetNumDynamicStringsInUse() + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(sharedMutex); + #endif + + size_t count = 0; + for(const auto &it : stringToID) + { + if(!IsStringIDStatic(it.second)) + count++; + } + return count; + } + + //returns the number of non-static string references that are currently in use + int64_t GetNumNonStaticStringReferencesInUse() + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(sharedMutex); + #endif + + int64_t count = 0; + for(size_t id = 0; id < idToStringAndRefCount.size(); id++) + { + if(!IsStringIDStatic(id)) + count += idToStringAndRefCount[id].second; + } + return count; + } + + //returns a vector of all the strings still in use. Intended for debugging. + std::vector GetNonStaticStringsInUse() + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadLock lock(sharedMutex); + #endif + + std::vector in_use; + for(size_t id = 0; id < idToStringAndRefCount.size(); id++) + { + if(!IsStringIDStatic(id) && idToStringAndRefCount[id].second > 0) + in_use.push_back(idToStringAndRefCount[id].first); + } + return in_use; + } + + //returns true if the string associated with stringID id is a static string + constexpr bool IsStringIDStatic(StringID id) + { + return id < numStaticStrings; //static strings must begin at id 0, so numStaticStrings represents the first string id that is not static + } + +protected: + + //increments the reference count and returns the previous reference count + inline int64_t IncrementRefCount(StringID id) + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //perform an atomic increment so that it can be done under a read lock + //TODO 15993: once C++20 is widely supported, change type to atomic_ref + return reinterpret_cast&>(idToStringAndRefCount[id].second).fetch_add(1); + #else + return idToStringAndRefCount[id].second++; + #endif + } + + //adds advancement to the reference count + inline void AdvanceRefCount(StringID id, size_t advancement) + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //perform an atomic increment so that it can be done under a read lock + //TODO 15993: once C++20 is widely supported, change type to atomic_ref + reinterpret_cast&>(idToStringAndRefCount[id].second).fetch_add(advancement); + #else + idToStringAndRefCount[id].second += advancement; + #endif + } + + //decrements the reference count and returns the previous reference count + inline int64_t DecrementRefCount(StringID id) + { + #if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + //perform an atomic decrement so that it can be done under a read lock + //TODO 15993: once C++20 is widely supported, change type to atomic_ref + return reinterpret_cast&>(idToStringAndRefCount[id].second).fetch_sub(1); + #else + return idToStringAndRefCount[id].second--; + #endif + } + + //removes everything associated with the id + inline void RemoveId(StringID id) + { + //removed last reference; clear the string and free memory + stringToID.erase(idToStringAndRefCount[id].first); + idToStringAndRefCount[id].first = ""; + idToStringAndRefCount[id].first.shrink_to_fit(); + unusedIDs.push(id); + } + + //must be defined outside of this class and initialize all static strings + //needs to set numStaticStrings and call EmplaceStaticString for each StringID from 0 up to numStaticStrings + // with the respective string + //the first two strings MUST be not-a-string followed by empty string + void InitializeStaticStrings(); + + //sets string id sid to str, assuming the position has already been allocated in idToStringAndRefCount + inline void EmplaceStaticString(StringID sid, const char *str) + { + idToStringAndRefCount[sid] = std::make_pair(str, 0); + stringToID.emplace(str, sid); + } + + //mapping from ID (index) to the string and the number of references + //use a signed counter in case it goes negative such that comparisons work well even if multiple threads have freed it + std::vector> idToStringAndRefCount; + + //mapping from string to ID (index of idToStringAndRefCount) + StringToStringIDAssoc stringToID; + + //IDs (indexes of idToStringAndRefCount) that are now unused + std::priority_queue, std::greater > unusedIDs; + + //number of static strings + size_t numStaticStrings; + +#if defined(MULTITHREAD_SUPPORT) || defined(MULTITHREAD_INTERFACE) + Concurrency::ReadWriteMutex sharedMutex; +#endif +}; + +extern StringInternPool string_intern_pool; + +//A reference to a string +//maintains reference counts and will clear upon destruction +class StringInternRef +{ +public: + constexpr StringInternRef() : id(StringInternPool::NOT_A_STRING_ID) + { } + + inline StringInternRef(StringInternPool::StringID sid) + { + id = string_intern_pool.CreateStringReference(sid); + } + + inline StringInternRef(const std::string &str) + { + id = string_intern_pool.CreateStringReference(str); + } + + //copy constructor + inline StringInternRef(const StringInternRef &sir) + { + id = string_intern_pool.CreateStringReference(sir.id); + } + + inline ~StringInternRef() + { + string_intern_pool.DestroyStringReference(id); + } + + //easy-to-read way of creating an empty string + inline static StringInternRef EmptyString() + { return StringInternRef(); } + + //assign another string reference + inline StringInternRef &operator =(const StringInternRef &sir) + { + if(id != sir.id) + { + string_intern_pool.DestroyStringReference(id); + id = string_intern_pool.CreateStringReference(sir.id); + } + return *this; + } + + //allow being able to use as a string + inline operator const std::string &() + { + return string_intern_pool.GetStringFromID(id); + } + + //allow being able to use as a string id + constexpr operator StringInternPool::StringID() + { + return id; + } + + //call this to set the id and create a reference + inline void SetIDAndCreateReference(StringInternPool::StringID sid) + { + //if changing id, need to delete previous + if(id > string_intern_pool.EMPTY_STRING_ID && id != sid) + string_intern_pool.DestroyStringReference(id); + + if(id != sid) + { + id = sid; + string_intern_pool.CreateStringReference(id); + } + } + + //only call this when the sid already has a reference and this is being used to manage it + inline void SetIDWithReferenceHandoff(StringInternPool::StringID sid) + { + if(id > string_intern_pool.EMPTY_STRING_ID) + { + //if the ids are different, then need to delete old + //if the ids are the same, then have a duplicate reference, so need to delete one + //so delete a reference either way + string_intern_pool.DestroyStringReference(id); + } + + id = sid; + } + +private: + + StringInternPool::StringID id; +}; + +//A weak reference to a string +// When the string does not exist, it will take on the value of the empty string +class StringInternWeakRef +{ +public: + constexpr StringInternWeakRef() + : id(StringInternPool::NOT_A_STRING_ID) + { } + + constexpr StringInternWeakRef(StringInternPool::StringID sid) + : id(sid) + { } + + StringInternWeakRef(const std::string &str) + { + id = string_intern_pool.GetIDFromString(str); + } + + constexpr StringInternWeakRef(const StringInternWeakRef &siwr) + : id(siwr.id) + { } + + //easy-to-read way of creating an empty string + inline static StringInternRef EmptyString() + { + return StringInternRef(); + } + + //allow being able to use as a string + inline operator const std::string &() + { + return string_intern_pool.GetStringFromID(id); + } + + //allow being able to use as a string id + constexpr operator StringInternPool::StringID() + { + return id; + } + + //only call this when the sid already has a reference and this is being used to manage it + constexpr void SetID(StringInternPool::StringID sid) + { + id = sid; + } + +private: + + StringInternPool::StringID id; +}; diff --git a/src/Amalgam/string/StringManipulation.cpp b/src/Amalgam/string/StringManipulation.cpp new file mode 100644 index 00000000..94faec1b --- /dev/null +++ b/src/Amalgam/string/StringManipulation.cpp @@ -0,0 +1,198 @@ +//project headers: +#include "StringManipulation.h" + +#include "FastMath.h" + +//3rd party headers: +#include "swiftdtoa/SwiftDtoa.h" + +//system headers: +#include + +std::string StringManipulation::NumberToString(double value) +{ + //first check for unusual values + if(FastIsNaN(value)) + return ".nan"; + if(value == std::numeric_limits::infinity()) + return ".infinity"; + if(value == -std::numeric_limits::infinity()) + return "-.infinity"; + + char char_buffer[128]; + size_t num_chars_written = swift_dtoa_optimal_double(value, &char_buffer[0], sizeof(char_buffer)); + return std::string(&char_buffer[0], num_chars_written); +} + +std::string StringManipulation::NumberToString(size_t value) +{ + //do this our own way because regular string manipulation libraries are slow and measurably impact performance + constexpr size_t max_num_digits = std::numeric_limits::digits / 3; //max of binary digits per character + constexpr size_t buffer_size = max_num_digits + 2; + char buffer[buffer_size]; + char *p = &buffer[0]; + + if(value == 0) //check for zero because it's a very common case for integers + *p++ = '0'; + else //convert each character + { + //peel off digits and put them in the next position for the string (reverse when done) + char *buffer_start = &buffer[0]; + while(value != 0) + { + //pull off the least significant digit and convert it to a number character + *p++ = ('0' + (value % 10)); + value /= 10; + } + + //put back in original order + std::reverse(buffer_start, p); + } + *p = '\0'; //terminate string + return std::string(&buffer[0]); +} + +std::string StringManipulation::RemoveFirstWord(std::string &str) +{ + std::string first_token; + size_t spacepos = str.find(' '); + if(spacepos == std::string::npos) + { + first_token = str; + str = ""; + } + else + { + first_token = str.substr(0, spacepos); + str = str.substr(spacepos + 1); + } + return first_token; +} + +std::string StringManipulation::BinaryStringToBase16(std::string &binary_string) +{ + std::string base16_string; + base16_string.resize(2 * binary_string.size()); + for(size_t i = 0; i < binary_string.size(); i++) + { + uint8_t value = binary_string[i]; + base16_string[2 * i] = base16Chars[value >> 4]; + base16_string[2 * i + 1] = base16Chars[value & 15]; + } + + return base16_string; +} + +std::string StringManipulation::Base16ToBinaryString(std::string &base16_string) +{ + std::string binary_string; + binary_string.resize(base16_string.size() / 2); + for(size_t i = 0; i < base16_string.size(); i += 2) + { + uint8_t value = (Base16CharToVal(base16_string[i]) << 4); + value += Base16CharToVal(base16_string[i + 1]); + binary_string[i / 2] = value; + } + + return binary_string; +} + +std::string StringManipulation::BinaryStringToBase64(std::string &binary_string) +{ + size_t binary_len = binary_string.size(); + size_t full_triples = binary_len / 3; + + std::string base64_string; + //resize triples to quads + base64_string.reserve((full_triples + 2) * 4); + + //encode all groups of 3 + for(size_t i = 0; i + 3 <= binary_len; i += 3) + { + auto encoded_quad = Base64ThreeBytesToFourChars(binary_string[i], + binary_string[i + 1], binary_string[i + 2]); + base64_string.append(begin(encoded_quad), end(encoded_quad)); + } + + //clean up any characters that aren't divisible by 3, + // zero fill the remaining bytes, and pad with '=' characters per standard + size_t chars_beyond_triplets = binary_len - full_triples * 3; + if(chars_beyond_triplets == 2) + { + auto encoded_quad = Base64ThreeBytesToFourChars(binary_string[binary_len - 2], + binary_string[binary_len - 1], 0); + + base64_string.push_back(encoded_quad[0]); + base64_string.push_back(encoded_quad[1]); + base64_string.push_back(encoded_quad[2]); + base64_string.push_back('='); + } + else if(chars_beyond_triplets == 1) + { + auto encoded_quad = Base64ThreeBytesToFourChars(binary_string[binary_len - 1], 0, 0); + + base64_string.push_back(encoded_quad[0]); + base64_string.push_back(encoded_quad[1]); + base64_string.push_back('='); + base64_string.push_back('='); + } + + return base64_string; +} + +std::string StringManipulation::Base64ToBinaryString(std::string &base64_string) +{ + size_t base64_len = base64_string.size(); + + if(base64_len == 0) + return std::string(); + + //if the length isn't divisible by 4, then resize down + if((base64_len % 4) != 0) + { + base64_len = (base64_len * 4) / 4; + base64_string.resize(base64_len); + } + + //exclude last quad, because don't know if it is full + // in case it has any padding via '=' character and will need special logic + size_t known_full_quads = (base64_len / 4) - 1; + + std::string binary_string; + //resize quads to triples + binary_string.reserve( ((known_full_quads + 2) * 3) / 4); + + //iterate over quads, but don't use <= because don't want to include last quad, + // same reasoning as known_full_quads + for(size_t i = 0; i + 4 < base64_len; i += 4) + { + auto triplet = Base64FourCharsToThreeBytes(base64_string[i], + base64_string[i + 1], base64_string[i + 2], base64_string[i + 3]); + binary_string.append(begin(triplet), end(triplet)); + } + + size_t last_quad_start = known_full_quads * 4; + + if(base64_string[last_quad_start + 2] == '=') + { + auto triplet = Base64FourCharsToThreeBytes(base64_string[last_quad_start], + base64_string[last_quad_start + 1], 'A', 'A'); + binary_string.push_back(triplet[0]); + } + else if(base64_string[last_quad_start + 3] == '=') + { + auto triplet = Base64FourCharsToThreeBytes(base64_string[last_quad_start], + base64_string[last_quad_start + 1], base64_string[last_quad_start + 2], 'A'); + binary_string.push_back(triplet[0]); + binary_string.push_back(triplet[1]); + } + else //last quad is full + { + auto triplet = Base64FourCharsToThreeBytes(base64_string[last_quad_start], + base64_string[last_quad_start + 1], base64_string[last_quad_start + 2], + base64_string[last_quad_start + 3]); + binary_string.append(begin(triplet), end(triplet)); + } + + return binary_string; +} diff --git a/src/Amalgam/string/StringManipulation.h b/src/Amalgam/string/StringManipulation.h new file mode 100644 index 00000000..5cd602fb --- /dev/null +++ b/src/Amalgam/string/StringManipulation.h @@ -0,0 +1,422 @@ +#pragma once + +//system headers: +#include +#include +#include +#include + +namespace StringManipulation +{ + //converts a number into a string quickly and accurately (moreso than built-in C++ libraries) + std::string NumberToString(double value); + std::string NumberToString(size_t value); + + //removes the first word from str and return the removed word + std::string RemoveFirstWord(std::string &str); + + //returns true if the character in the string s starting at position is whitespace + inline bool IsUtf8Whitespace(std::string &s, size_t position) + { + auto cur_char = s[position]; + if(cur_char == ' ' || cur_char == '\t' || cur_char == '\n' || cur_char == '\r' + || cur_char == '\f' || cur_char == '\v') + return true; + + //need to additionally check the following multicharacter utf-8 code points: + //name hex dec bytes + // no - break space U + 00A0 160 0xC2 0xA0 + // ogham space mark U + 1680 5760 0xE1 0x9A 0x80 + // en quad U + 2000 8192 0xE2 0x80 0x80 + // em quad U + 2001 8193 0xE2 0x80 0x81 + // en space U + 2002 8194 0xE2 0x80 0x82 + // em space U + 2003 8195 0xE2 0x80 0x83 + // three - per - em space U + 2004 8196 0xE2 0x80 0x84 + // four - per - em space U + 2005 8197 0xE2 0x80 0x85 + // six - per - em space U + 2006 8198 0xE2 0x80 0x86 + // figure space U + 2007 8199 0xE2 0x80 0x87 + // punctuation space U + 2008 8200 0xE2 0x80 0x88 + // thin space U + 2009 8201 0xE2 0x80 0x89 + // hair space U + 200A 8202 0xE2 0x80 0x8A + // line separator U + 2028 8232 0xE2 0x80 0xA8 + // paragraph separator U + 2029 8233 0xE2 0x80 0xA9 + // narrow no - break space U + 202F 8239 0xE2 0x80 0xAF + // medium mathematical space U + 205F 8287 0xE2 0x81 0x9F + // ideographic space U + 3000 12288 0xE3 0x80 0x80 + + if(position + 2 >= s.size()) + return false; + + if(static_cast(cur_char) == 0xC2 && static_cast(s[position + 1]) == 0xA0) + return true; + + //need 3 characters for the remaining + if(position + 3 >= s.size()) + return false; + + if(static_cast(cur_char) == 0xE1 && static_cast(s[position + 1]) == 0x9A + && static_cast(s[position + 2]) == 0x80) + return true; + + if(static_cast(cur_char) == 0xE2) + { + if(static_cast(s[position + 1]) == 0x80) + { + uint8_t third_char = s[position + 2]; + if(third_char >= 0x80 && third_char <= 0xAF) + return true; + } + else if(static_cast(s[position + 1]) == 0x81 && static_cast(s[position + 2]) == 0x9F) + { + return true; + } + } + + if(static_cast(cur_char) == 0xE3 && static_cast(s[position + 1]) == 0x80 + && static_cast(s[position + 2]) == 0x80) + return true; + + return false; + } + + //returns true if the character in the string s starting at position is a newline + inline bool IsUtf8Newline(std::string &s, size_t position) + { + auto cur_char = s[position]; + //don't count carriage returns (\r) as new lines, since it just moves the cursor + if(cur_char == '\n' || cur_char == '\v' || cur_char == '\f') + return true; + + if(position + 3 < s.size()) + { + if(static_cast(cur_char) == 0xE2) + { + //line separator + if(static_cast(s[position + 1]) == 0x80 && static_cast(s[position + 2]) == 0xA8) + return true; + //paragraph separator + else if(static_cast(s[position + 1]) == 0x80 && static_cast(s[position + 2]) == 0xA9) + return true; + } + } + + return false; + } + + //returns the length of the UTF-8 character in s starting at the specified offset + inline size_t GetUTF8CharacterLength(std::string_view s, size_t offset = 0) + { + if(offset >= s.size()) + return 0; + + //there's at least one byte left + size_t remaining_length = s.size() - offset; + + uint8_t first_byte = s[offset]; + + //0xxxxxxx means 1 byte in UTF-8 standard + if((first_byte & 0x80) == 0x00) + return 1; + + //110xxxxx means 2 bytes in UTF-8 standard + if((first_byte & 0xE0) == 0xC0) + return std::min(2, remaining_length); + + //1110xxxx means 3 bytes in UTF-8 standard + if((first_byte & 0xF0) == 0xE0) + return std::min(3, remaining_length); + + //11110xxx means 4 bytes in UTF-8 standard + if((first_byte & 0xF8) == 0xF0) + return std::min(4, remaining_length); + + //else invalid UTF-8, just return one byte + return 1; + } + + //returns the number of UTF8 characters in the string + inline size_t GetNumUTF8Characters(std::string_view s) + { + size_t offset = 0; + size_t next_offset = 0; + size_t num_chars = 0; + do + { + next_offset = GetUTF8CharacterLength(s, offset); + offset += next_offset; + if(next_offset != 0) + num_chars++; + } while(next_offset != 0); + + return num_chars; + } + + //for s, finds the offset of the last utf8 character and its length + inline std::pair GetLastUTF8CharacterOffsetAndLength(std::string_view s) + { + //walk along the utf8 string until find the last character + size_t offset = 0; + size_t end_offset = 0 + GetUTF8CharacterLength(s, 0); + while(end_offset < s.size()) + { + size_t next_length = GetUTF8CharacterLength(s, end_offset); + if(next_length == 0) + break; + + offset = end_offset; + end_offset = offset + next_length; + } + + size_t length = end_offset - offset; + return std::make_pair(offset, length); + } + + //returns the offset of the nth utf8 character in the specified string + // if the string does not have that many characters, then it will return the size of the string + inline size_t GetNthUTF8CharacterOffset(std::string_view s, size_t nth) + { + size_t offset = 0; + for(size_t i = 0; i < nth; i++) + { + size_t len = GetUTF8CharacterLength(s, offset); + if(len == 0) + break; + + offset += len; + } + + return offset; + } + + //returns the offset of the nth last utf8 character in the specified string + // if the string does not have that many characters, then it will return the size of the string + inline size_t GetNthLastUTF8CharacterOffset(std::string_view s, size_t nth) + { + size_t num_utf8_chars = GetNumUTF8Characters(s); + + //if past the end, just return the end + if(nth >= num_utf8_chars) + return s.size(); + + //reflect from the end + nth = num_utf8_chars - nth; + + return GetNthUTF8CharacterOffset(s, nth); + } + + //expands the utf8 string s into each character in exploded + inline void ExplodeUTF8Characters(std::string_view s, std::vector &exploded) + { + exploded.clear(); + + size_t utf8_char_start_offset = 0; + while(utf8_char_start_offset < s.size()) + { + size_t utf8_char_length = StringManipulation::GetUTF8CharacterLength(s, utf8_char_start_offset); + //done if no more characters + if(utf8_char_length == 0) + break; + + //there's at least one character, but copy out each character in the string + uint32_t value = s[utf8_char_start_offset]; + for(size_t i = 1; i < utf8_char_length; i++) + { + value <<= 8; + value |= s[utf8_char_start_offset + i]; + } + exploded.push_back(value); + + utf8_char_start_offset += utf8_char_length; + } + } + + //concatenates utf8 characters into utf8 string, opposite of ExplodeUTF8Characters + inline std::string ConcatUTF8Characters(std::vector &chars) + { + std::string result; + result.reserve(chars.size()); + + //for each character, concatenate any parts that fit + for(auto c : chars) + { + if(c > 0xFFFFFF) + { + result.push_back(c >> 24); + c &= 0xFFFFFF; + } + + if(c > 0xFFFF) + { + result.push_back(c >> 16); + c &= 0xFFFF; + } + + if(c > 0xFF) + { + result.push_back(c >> 8); + c &= 0xFF; + } + + result.push_back(c); + } + + return result; + } + + template + inline std::string To1ByteString(SourceType value) + { + std::string string_value(1, '\0'); + string_value[0] = reinterpret_cast(value); + return string_value; + } + + template + inline std::string To2ByteStringLittleEndian(SourceType value) + { + std::string string_value(2, '\0'); + uint16_t to_write = reinterpret_cast(value); + string_value[0] = static_cast(to_write & 255); + string_value[1] = static_cast((to_write >> 8) & 255); + return string_value; + } + + template + inline std::string To2ByteStringBigEndian(SourceType value) + { + std::string string_value(2, '\0'); + uint16_t to_write = reinterpret_cast(value); + string_value[1] = static_cast(to_write & 255); + string_value[0] = static_cast((to_write >> 8) & 255); + return string_value; + } + + template + inline std::string To4ByteStringLittleEndian(SourceType value) + { + std::string string_value(4, '\0'); + uint32_t to_write = reinterpret_cast(value); + string_value[0] = static_cast(to_write & 255); + string_value[1] = static_cast((to_write >> 8) & 255); + string_value[2] = static_cast((to_write >> 16) & 255); + string_value[3] = static_cast((to_write >> 24) & 255); + return string_value; + } + + template + inline std::string To4ByteStringBigEndian(SourceType value) + { + std::string string_value(4, '\0'); + uint32_t to_write = reinterpret_cast(value); + string_value[3] = static_cast(to_write & 255); + string_value[2] = static_cast((to_write >> 8) & 255); + string_value[1] = static_cast((to_write >> 16) & 255); + string_value[0] = static_cast((to_write >> 24) & 255); + return string_value; + } + + template + inline std::string To8ByteStringLittleEndian(SourceType value) + { + std::string string_value(8, '\0'); + uint64_t to_write = reinterpret_cast(value); + string_value[0] = static_cast(to_write & 255); + string_value[1] = static_cast((to_write >> 8) & 255); + string_value[2] = static_cast((to_write >> 16) & 255); + string_value[3] = static_cast((to_write >> 24) & 255); + string_value[4] = static_cast((to_write >> 32) & 255); + string_value[5] = static_cast((to_write >> 40) & 255); + string_value[6] = static_cast((to_write >> 48) & 255); + string_value[7] = static_cast((to_write >> 56) & 255); + return string_value; + } + + template + inline std::string To8ByteStringBigEndian(SourceType value) + { + std::string string_value(8, '\0'); + uint64_t to_write = reinterpret_cast(value); + string_value[7] = static_cast(to_write & 255); + string_value[6] = static_cast((to_write >> 8) & 255); + string_value[5] = static_cast((to_write >> 16) & 255); + string_value[4] = static_cast((to_write >> 24) & 255); + string_value[3] = static_cast((to_write >> 32) & 255); + string_value[2] = static_cast((to_write >> 40) & 255); + string_value[1] = static_cast((to_write >> 48) & 255); + string_value[0] = static_cast((to_write >> 56) & 255); + return string_value; + } + + //converts a single Base16 character into a binary nibble value + constexpr uint8_t Base16CharToVal(char c) + { + if(c >= '0' && c <= '9') + return c - '0'; + if(c >= 'a' && c <= 'f') + return 10 + c - 'a'; + if(c >= 'A' && c <= 'F') + return 10 + c - 'A'; + + return 0; + } + + //encodes the binary_string with Base16 and returns the new string + std::string BinaryStringToBase16(std::string &binary_string); + + //decodes the Base16 string and returns the binary string + std::string Base16ToBinaryString(std::string &base16_string); + + //converts a single Base64 character into a binary 6-bit value + constexpr uint8_t Base64CharToVal(char c) + { + if(c >= 'A' && c <= 'Z') + return c - 'A'; + if(c >= 'a' && c <= 'z') + return 26 + c - 'a'; + if(c >= '0' && c <= '9') + return 52 + c - '0'; + if(c == '+') + return 62; + if(c == '/') + return 63; + + return 0; + } + + //encodes the binary_string with Base64 and returns the new string + std::string BinaryStringToBase64(std::string &binary_string); + + //decodes the Base64 string and returns the binary string + std::string Base64ToBinaryString(std::string &base64_string); + + static const std::string base16Chars = "0123456789abcdef"; + static const std::string base64Chars + = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + //converts 3 binary bytes into 4 chars for Base64 encoding + inline std::array Base64ThreeBytesToFourChars(uint8_t a, uint8_t b, uint8_t c) + { + uint32_t value_of_triple = ((a << 16) | (b << 8) | c); + + //extract each group of 6 bits + char char1 = base64Chars[(value_of_triple >> 18) & 63]; + char char2 = base64Chars[(value_of_triple >> 12) & 63]; + char char3 = base64Chars[(value_of_triple >> 6) & 63]; + char char4 = base64Chars[value_of_triple & 63]; + return { char1, char2, char3, char4 }; + } + + //converts 4 chars into 3 binary bytes for Base64 encoding + inline std::array Base64FourCharsToThreeBytes(char a, char b, char c, char d) + { + std::uint32_t value_of_quad = ( (Base64CharToVal(a) << 18) + | (Base64CharToVal(b) << 12) + | (Base64CharToVal(c) << 6) + | Base64CharToVal(d) ); + + uint8_t value1 = (value_of_quad >> 16) & 255; + uint8_t value2 = (value_of_quad >> 8) & 255; + uint8_t value3 = value_of_quad & 255; + return { value1, value2, value3 }; + } +}; diff --git a/test/lib_smoke_test/main.cpp b/test/lib_smoke_test/main.cpp new file mode 100644 index 00000000..9789f8a6 --- /dev/null +++ b/test/lib_smoke_test/main.cpp @@ -0,0 +1,31 @@ +// +// Test driver for Amalgam shared libraries (dll/so/dylib) +// + +//project headers: +#include "Amalgam.h" + +//system headers: +#include +#include + +int main(int argc, char* argv[]) +{ + // Print version: + std::cout << std::string(GetVersionString()) << std::endl; + + // Load+execute+delete entity: + char handle[] = "1"; + char* file = (argc > 1) ? argv[1] : (char*)"test.amlg"; + char write_log[] = ""; + char print_log[] = ""; + if(LoadEntity(handle, file, false, true, write_log, print_log)) + { + char label[] = "test"; + ExecuteEntity(handle, label); + DeleteEntity(handle); + return 0; + } + + return 1; +} \ No newline at end of file diff --git a/test/lib_smoke_test/test.amlg b/test/lib_smoke_test/test.amlg new file mode 100644 index 00000000..4ebacb6b --- /dev/null +++ b/test/lib_smoke_test/test.amlg @@ -0,0 +1,63 @@ +; +; Test amlg file for test driver to test amalgam libraries +; + +(seq + (map + (lambda (create_entities + (concat "case_" (target_index)) + (zip_labels (list "A" "B" "N") (target_value)) + )) + (list + (list 1 .7 "A") + (list 1.1 .6 "A") + (list .6 .4 "A") + (list .7 .5 "A" ) + (list 1.5 1.5 "A" ) + (list 2.1 1.6 "B") + (list 2.6 2.7 "B") + (list 3.6 3.1 "A") + (list 2 1 "B") + (list 2.1 1.1 "B") + (list 2.6 1.1 "A") + (list 3.1 1.2 "B") + (list 1.4 1.7 "B") + (list 2.1 1.6 "C") + (list 2.5 2.4 "C") + (list 2.9 2.8 "C") + (list 4.5 5.0 "B") + (list 5.1 6.2 "C") + (list 6.9 7.1 "C") + (list 8.6 7.4 "C") + (list 6 3.7 "C") + (list 6.8 4.1 "C") + (list 7.5 4.8 "C") + (list 8.6 5.4 "C") + ) + ) + + (print + (compute_on_contained_entities (list + (query_nearest_generalized_distance + 5 + (list "A" "B" "N") + (list 2 3 "A") + (null) ;weights + (assoc "A" "continuous" "B" "continuous" "N" "nominal") + (assoc + "A" 9.0 + "B" 8.0 + "N" 3 + ) + (null) ;deviations + 0.1 ; p + -1 ;dwe + (null) + "fixed rand seed" + (null) + "precise" + (true) + ) + )) + ) +)