From 2593cf38df56482a58836af6c6216ad0c3abced8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Hordy=C5=84ski?= Date: Wed, 20 Mar 2024 14:47:43 +0100 Subject: [PATCH 1/3] Pypi setup --- LICENSE | 21 +++++++++++++++++++++ LICENSE.md | 1 - setup.cfg | 26 +++++++------------------- 3 files changed, 28 insertions(+), 20 deletions(-) create mode 100644 LICENSE delete mode 100644 LICENSE.md diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..a983c936 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 deepsense.ai + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index 23e48d2f..00000000 --- a/LICENSE.md +++ /dev/null @@ -1 +0,0 @@ -Proprietary License. \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 090c3622..6d38271a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,26 +2,24 @@ name = dbally # do not change version by hand: use bump_version.sh version = file: src/dbally/VERSION -description = "deepsense.ai project" +description = "Efficient, consistent and secure library for querying structured data with natural language" author = deepsense.ai author_email = contact@deepsense.ai -license = Other/Proprietary License -license_files = LICENSE.md +license = MIT +license_files = LICENSE classifiers = - Development Status :: 1 - Planning + Development Status :: 2 - Pre-Alpha Environment :: Console - Environment :: GPU :: NVIDIA CUDA - Intended Audience :: Science/Research + Intended Audience :: Developers License :: Other/Proprietary License Natural Language :: English - Operating System :: Independent Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 Programming Language :: Python :: 3.12 - Topic :: AI - Private :: Do Not Upload +project_urls = + Documentation = https://db-ally.deepsense.ai/ [options] package_dir = @@ -50,16 +48,6 @@ examples = pydantic~=2.6.0 pydantic_settings~=2.1.0 psycopg2-binary~=2.9.9 -benchmark = - asyncpg~=0.28.0 - eval-type-backport~=0.1.3 - hydra-core~=1.3.2 - loguru~=0.7.0 - neptune~=1.6.3 - pydantic~=2.6.1 - pydantic-core~=2.16.2 - pydantic-settings~=2.0.3 - psycopg2-binary~=2.9.9 [options.packages.find] where = src From 4278360831aeeb5f85fa6f647bcfc7bee5529d1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Hordy=C5=84ski?= Date: Wed, 3 Apr 2024 12:55:22 +0200 Subject: [PATCH 2/3] gh prep --- {.gitlab/ci => .github/scripts}/build_docs.sh | 0 .github/workflows/ci.yml | 199 ++++ .github/workflows/documentation.yml | 36 + .gitlab/ci/terraform_apply.sh | 18 - LICENSE | 21 - data/schemas/superhero.sql | 80 -- data/superhero.json | 978 ------------------ examples/__init__.py | 0 examples/config.py | 20 - examples/paths.py | 6 - examples/superhero_example.py | 290 ------ infra/terraform/main.tf | 49 - infra/terraform/modules/database/main.tf | 60 -- infra/terraform/providers.tf | 14 - infra/terraform/variables.tf | 20 - 15 files changed, 235 insertions(+), 1556 deletions(-) rename {.gitlab/ci => .github/scripts}/build_docs.sh (100%) create mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/documentation.yml delete mode 100755 .gitlab/ci/terraform_apply.sh delete mode 100644 LICENSE delete mode 100644 data/schemas/superhero.sql delete mode 100644 data/superhero.json delete mode 100644 examples/__init__.py delete mode 100644 examples/config.py delete mode 100644 examples/paths.py delete mode 100644 examples/superhero_example.py delete mode 100644 infra/terraform/main.tf delete mode 100644 infra/terraform/modules/database/main.tf delete mode 100644 infra/terraform/providers.tf delete mode 100644 infra/terraform/variables.tf diff --git a/.gitlab/ci/build_docs.sh b/.github/scripts/build_docs.sh similarity index 100% rename from .gitlab/ci/build_docs.sh rename to .github/scripts/build_docs.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..c7b5353f --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,199 @@ +name: Continuous Integration + +on: + push: + branches: [main, master] + pull_request: + +jobs: + lints: + name: Run linters + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + checks: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Cache pre-commit + uses: actions/cache@v3 + with: + path: ~/.cache/pre-commit + key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }} + + - name: Install pre-commit + run: pip3 install pre-commit + + - name: Run pre-commit checks + run: pre-commit run --all-files --show-diff-on-failure --color always + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + scan-type: "fs" + ignore-unfixed: true + exit-code: 0 # change if you want to fail build on vulnerabilities + severity: "CRITICAL,HIGH,MEDIUM" + format: "table" + output: "trivy-scanning-results.txt" + + - name: Format trivy message + run: | + echo "Trivy scanning results." >> trivy.txt + cat trivy-scanning-results.txt >> trivy.txt + + - name: Add trivy report to PR + uses: thollander/actions-comment-pull-request@v2 + continue-on-error: true + if: ${{ github.event_name == 'pull_request' }} + with: + filePath: trivy.txt + reactions: "" + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + comment_tag: trivy + + - name: Create venv + run: . ./setup_dev_env.sh + + - name: Check licenses + run: ./check_licenses.sh + + - name: Generate pip freeze + run: | + source venv/bin/activate + pip freeze > requirements-freeze.txt + + - name: Publish Artefacts + uses: actions/upload-artifact@v3 + if: always() + continue-on-error: true + with: + name: results + path: | + requirements-freeze.txt + licenses.txt + trivy-scanning-results.txt + retention-days: 30 + + - name: Publish Test Report + uses: actions/upload-artifact@v3 + if: always() + continue-on-error: true + with: + name: test-report + path: report.xml + retention-days: 10 + + - name: Validate package build + run: | + source venv/bin/activate + python -m pip install -U build + python -m build + + - name: Publish Package + uses: actions/upload-artifact@v3 + continue-on-error: true + if: success() + with: + name: packages + path: dist/** + retention-days: 3 + + tests: + name: Run tests + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + checks: write + pull-requests: write + contents: write # required for advanced coverage reporting (to keep branch) + strategy: + fail-fast: false # do not stop all jobs if one fails + matrix: + include: + - python-version: "3.11" + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Cache Dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements-dev.txt') }}-${{ hashFiles('**/setup.cfg') }}-${{ hashFiles('**/pyproject.toml') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install Dependencies + run: pip install -r requirements-dev.txt + + - name: Run Tests With Coverage + run: | + # run with coverage to not execute tests twice + coverage run -m pytest -v -p no:warnings --junitxml=report.xml tests/ + coverage report + coverage xml + + - name: Test Report + uses: mikepenz/action-junit-report@v4 + continue-on-error: true + if: always() + with: + report_paths: 'report.xml' + + - name: Publish Test Report + uses: actions/upload-artifact@v3 + continue-on-error: true + if: always() + with: + name: test-report + path: report.xml + retention-days: 10 + + # simpler version for code coverage reporting + # - name: Produce Coverage report + # uses: 5monkeys/cobertura-action@v13 + # continue-on-error: true + # with: + # path: coverage.xml + # minimum_coverage: 70 + # fail_below_threshold: false + + # more complex version for better coverage reporting + - name: Produce the coverage report + uses: insightsengineering/coverage-action@v2 + continue-on-error: true + with: + # Path to the Cobertura XML report. + path: coverage.xml + # Minimum total coverage, if you want to the + # workflow to enforce it as a standard. + # This has no effect if the `fail` arg is set to `false`. + threshold: 60 + # Fail the workflow if the minimum code coverage + # reuqirements are not satisfied. + fail: false + # Publish the rendered output as a PR comment + publish: true + # Create a coverage diff report. + diff: true + # Branch to diff against. + # Compare the current coverage to the coverage + # determined on this branch. + diff-branch: ${{ github.event.repository.default_branch }} + # make report togglable + togglable-report: true + # This is where the coverage reports for the + # `diff-branch` are stored. + # Branch is created if it doesn't already exist'. + diff-storage: _xml_coverage_reports + # A custom title that can be added to the code + # coverage summary in the PR comment. + coverage-summary-title: "Code Coverage Summary" diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 00000000..298ce311 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,36 @@ +name: Build documentation + +on: + push: + branches: [main, master] + +jobs: + pages: + runs-on: ubuntu-latest + container: python:3.11 + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - name: Cache Dependencies + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements-dev.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + # for best results, it is better to generate + # documentation within development environment + - name: Create venv + run: . ./setup_dev_env.sh + + - name: Build docs + run: ./build_docs.sh + + - name: Deploy + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./public diff --git a/.gitlab/ci/terraform_apply.sh b/.gitlab/ci/terraform_apply.sh deleted file mode 100755 index 3057d6c6..00000000 --- a/.gitlab/ci/terraform_apply.sh +++ /dev/null @@ -1,18 +0,0 @@ -echo $GCP_KEY | base64 -d >> gcp_creds.json -export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/gcp_creds.json - -# Install terraform -apt-get install wget -wget -O- https://apt.releases.hashicorp.com/gpg | gpg --dearmor > /usr/share/keyrings/hashicorp-archive-keyring.gpg -echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" > /etc/apt/sources.list.d/hashicorp.list -apt update -apt-get install terraform - -gcloud auth activate-service-account --key-file gcp_creds.json -gcloud config set project ds-internal-db-ally - -cd infra/terraform -terraform init -terraform apply \ - -var "project=$(gcloud config get project)" \ - -auto-approve \ No newline at end of file diff --git a/LICENSE b/LICENSE deleted file mode 100644 index a983c936..00000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2024 deepsense.ai - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/data/schemas/superhero.sql b/data/schemas/superhero.sql deleted file mode 100644 index 676da684..00000000 --- a/data/schemas/superhero.sql +++ /dev/null @@ -1,80 +0,0 @@ -CREATE TABLE alignment -( - id INTEGER not null - primary key, - alignment TEXT default NULL -); -CREATE TABLE attribute -( - id INTEGER not null - primary key, - attribute_name TEXT default NULL -); -CREATE TABLE colour -( - id INTEGER not null - primary key, - colour TEXT default NULL -); -CREATE TABLE gender -( - id INTEGER not null - primary key, - gender TEXT default NULL -); -CREATE TABLE publisher -( - id INTEGER not null - primary key, - publisher_name TEXT default NULL -); -CREATE TABLE race -( - id INTEGER not null - primary key, - race TEXT default NULL -); -CREATE TABLE superhero -( - id INTEGER not null - primary key, - superhero_name TEXT default NULL, - full_name TEXT default NULL, - gender_id INTEGER default NULL, - eye_colour_id INTEGER default NULL, - hair_colour_id INTEGER default NULL, - skin_colour_id INTEGER default NULL, - race_id INTEGER default NULL, - publisher_id INTEGER default NULL, - alignment_id INTEGER default NULL, - height_cm INTEGER default NULL, - weight_kg INTEGER default NULL, - foreign key (alignment_id) references alignment(id), - foreign key (eye_colour_id) references colour(id), - foreign key (gender_id) references gender(id), - foreign key (hair_colour_id) references colour(id), - foreign key (publisher_id) references publisher(id), - foreign key (race_id) references race(id), - foreign key (skin_colour_id) references colour(id) -); -CREATE TABLE hero_attribute -( - hero_id INTEGER default NULL, - attribute_id INTEGER default NULL, - attribute_value INTEGER default NULL, - foreign key (attribute_id) references attribute(id), - foreign key (hero_id) references superhero(id) -); -CREATE TABLE superpower -( - id INTEGER not null - primary key, - power_name TEXT default NULL -); -CREATE TABLE hero_power -( - hero_id INTEGER default NULL, - power_id INTEGER default NULL, - foreign key (hero_id) references superhero(id), - foreign key (power_id) references superpower(id) -); diff --git a/data/superhero.json b/data/superhero.json deleted file mode 100644 index 9b53dd74..00000000 --- a/data/superhero.json +++ /dev/null @@ -1,978 +0,0 @@ -[ - { - "question_id": 717, - "db_id": "superhero", - "question": "Please list all the superpowers of 3-D Man.", - "evidence": "3-D Man refers to superhero_name = '3-D Man'; superpowers refers to power_name", - "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T1.superhero_name = '3-D Man'", - "difficulty": "simple" - }, - { - "question_id": 718, - "db_id": "superhero", - "question": "How many superheroes have the super power of \"Super Strength\"?", - "evidence": "super power of \"Super Strength\" refers to power_name = 'Super Strength'", - "SQL": "SELECT COUNT(T1.hero_id) FROM hero_power AS T1 INNER JOIN superpower AS T2 ON T1.power_id = T2.id WHERE T2.power_name = 'Super Strength'", - "difficulty": "simple" - }, - { - "question_id": 719, - "db_id": "superhero", - "question": "Among the superheroes with the super power of \"Super Strength\", how many of them have a height of over 200cm?", - "evidence": "super power of \"Super Strength\" refers to power_name = 'Super Strength'; a height of over 200cm refers to height_cm > 200", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T3.power_name = 'Super Strength' AND T1.height_cm > 200", - "difficulty": "moderate" - }, - { - "question_id": 720, - "db_id": "superhero", - "question": "Please list the full names of all the superheroes with over 15 super powers.", - "evidence": "15 super powers refers to COUNT(full_name) > 15", - "SQL": "SELECT DISTINCT T1.full_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id GROUP BY T1.full_name HAVING COUNT(T2.power_id) > 15", - "difficulty": "simple" - }, - { - "question_id": 726, - "db_id": "superhero", - "question": "Please give the full name of the tallest hero published by Marvel Comics.", - "evidence": "the tallest hero refers to MAX(height_cm); published by Marvel Comics refers to publisher_name = 'Marvel Comics'", - "SQL": "SELECT T1.full_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T2.publisher_name = 'Marvel Comics' ORDER BY T1.height_cm DESC LIMIT 1", - "difficulty": "moderate" - }, - { - "question_id": 727, - "db_id": "superhero", - "question": "Who is the publisher of Sauron?", - "evidence": "the publisher refers to publisher_name; Sauron refers to superhero_name = 'Sauron'", - "SQL": "SELECT T2.publisher_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T1.superhero_name = 'Sauron'", - "difficulty": "simple" - }, - { - "question_id": 729, - "db_id": "superhero", - "question": "What is the average height of the superheroes from Marvel Comics?", - "evidence": "superheroes from Marvel Comics refers to publisher_name = 'Marvel Comics'; average height of the superheroes refers to AVG(height_cm)", - "SQL": "SELECT AVG(T1.height_cm) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T2.publisher_name = 'Marvel Comics'", - "difficulty": "simple" - }, - { - "question_id": 730, - "db_id": "superhero", - "question": "Among the superheroes from Marvel Comics, what is the percentage of those who have the super power of \"Super Strength\"?", - "evidence": "the superheroes from Marvel Comics refers to publisher_name = 'Marvel Comics'; super power of \"Super Strength\" refers to power_name = 'Super Strength'; Calculation = MULTIPLY(DIVIDE(SUM(power_name = 'Super Strength'), COUNT(id)), 100)", - "SQL": "SELECT CAST(COUNT(CASE WHEN T3.power_name = 'Super Strength' THEN T1.id ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id INNER JOIN publisher AS T4 ON T1.publisher_id = T4.id WHERE T4.publisher_name = 'Marvel Comics'", - "difficulty": "challenging" - }, - { - "question_id": 731, - "db_id": "superhero", - "question": "How many superheroes did DC Comics publish?", - "evidence": "superheroes that DC Comics published refers to publisher_name = 'DC Comics'", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T2.publisher_name = 'DC Comics'", - "difficulty": "simple" - }, - { - "question_id": 732, - "db_id": "superhero", - "question": "Which publisher published the slowest superhero?", - "evidence": "the slowest superhero refers to attribute_name = 'Speed' where MIN(attribute_value); publisher refers to publisher_name", - "SQL": "SELECT T2.publisher_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN hero_attribute AS T3 ON T1.id = T3.hero_id INNER JOIN attribute AS T4 ON T3.attribute_id = T4.id WHERE T4.attribute_name = 'Speed' ORDER BY T3.attribute_value LIMIT 1", - "difficulty": "moderate" - }, - { - "question_id": 733, - "db_id": "superhero", - "question": "How many gold-eyed superheroes did Marvel Comics publish?", - "evidence": "gold-eyed refers to colour = 'Gold' where eye_colour_id = colour.id; superheroes that Marvel Comics published refers to publisher_name = 'Marvel Comics'", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN colour AS T3 ON T1.eye_colour_id = T3.id WHERE T2.publisher_name = 'Marvel Comics' AND T3.colour = 'Gold'", - "difficulty": "moderate" - }, - { - "question_id": 734, - "db_id": "superhero", - "question": "What is the publisher's name of Blue Beetle II?", - "evidence": "Blue Beetle II refers to superhero_name = 'Blue Beetle II'", - "SQL": "SELECT T2.publisher_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T1.superhero_name = 'Blue Beetle II'", - "difficulty": "simple" - }, - { - "question_id": 735, - "db_id": "superhero", - "question": "How many superheroes with blonde hair are there?", - "evidence": "superheroes with blonde hair refers to colour = 'Blond' where hair_colour_id = colour.id", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.hair_colour_id = T2.id WHERE T2.colour = 'Blond'", - "difficulty": "simple" - }, - { - "question_id": 736, - "db_id": "superhero", - "question": "Who is the dumbest superhero?", - "evidence": "the dumbest superhero refers to MIN(attribute_value) where attribute_name = 'Intelligence'", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T3.attribute_name = 'Intelligence' ORDER BY T2.attribute_value LIMIT 1", - "difficulty": "moderate" - }, - { - "question_id": 737, - "db_id": "superhero", - "question": "What is Copycat's race?", - "evidence": "Copycat is the superhero_name;", - "SQL": "SELECT T2.race FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T1.superhero_name = 'Copycat'", - "difficulty": "simple" - }, - { - "question_id": 738, - "db_id": "superhero", - "question": "How many superheroes have durability of less than 50?", - "evidence": "durability of less than 50 refers to attribute_name = 'Durability' AND attribute_value < 50", - "SQL": "SELECT COUNT(T1.hero_id) FROM hero_attribute AS T1 INNER JOIN attribute AS T2 ON T1.attribute_id = T2.id WHERE T2.attribute_name = 'Durability' AND T1.attribute_value < 50", - "difficulty": "simple" - }, - { - "question_id": 739, - "db_id": "superhero", - "question": "What are the names of the superheroes with the power of death touch?", - "evidence": "name of superheroes refers to refers to superhero_name; the power of death touch refers to power_name = 'Death Touch'", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T3.power_name = 'Death Touch'", - "difficulty": "moderate" - }, - { - "question_id": 740, - "db_id": "superhero", - "question": "How many female superheroes have a strength value of 100?", - "evidence": "female refers to gender = 'Female'; strength value of 100 refers to attribute_name = 'Strength' AND attribute_value = 100", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id INNER JOIN gender AS T4 ON T1.gender_id = T4.id WHERE T3.attribute_name = 'Strength' AND T2.attribute_value = 100 AND T4.gender = 'Female'", - "difficulty": "moderate" - }, - { - "question_id": 741, - "db_id": "superhero", - "question": "What is the name of the superhero that has the most powers?", - "evidence": "name of the superhero refers to superhero_name; superhero that has the most powers refers to MAX(COUNT(superhero_name))", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id GROUP BY T1.superhero_name ORDER BY COUNT(T2.hero_id) DESC LIMIT 1", - "difficulty": "simple" - }, - { - "question_id": 742, - "db_id": "superhero", - "question": "How many vampire superheroes are there?", - "evidence": "vampire superheroes refers to race = 'Vampire'", - "SQL": "SELECT COUNT(T1.superhero_name) FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T2.race = 'Vampire'", - "difficulty": "simple" - }, - { - "question_id": 743, - "db_id": "superhero", - "question": "What is the percentage of superheroes who act in their own self-interest or make decisions based on their own moral code? Indicate how many of the said superheroes were published by Marvel Comics.", - "evidence": "published by Marvel Comics refers to publisher_name = 'Marvel Comics'; superheroes who act in their own self-interest or make decisions based on their own moral code refers to alignment = 'Bad'; calculation = MULTIPLY(DIVIDE(SUM(alignment = 'Bad); count(id)), 100)", - "SQL": "SELECT (CAST(COUNT(*) AS REAL) * 100 / (SELECT COUNT(*) FROM superhero)), CAST(SUM(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE 0 END) AS REAL) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN alignment AS T3 ON T3.id = T1.alignment_id WHERE T3.alignment = 'Bad'", - "difficulty": "challenging" - }, - { - "question_id": 744, - "db_id": "superhero", - "question": "Between DC and Marvel Comics, which publisher has published more superheroes? Find the difference in the number of superheroes they have published.", - "evidence": "DC refers to publisher_name = 'DC Comics'; Marvel Comics refers to publisher_name = 'Marvel Comics'; calculation = SUBTRACT(SUM(publisher_name = 'Marvel Comics'), SUM(publisher_name = 'DC Comics'))", - "SQL": "SELECT SUM(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE 0 END) - SUM(CASE WHEN T2.publisher_name = 'DC Comics' THEN 1 ELSE 0 END) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id", - "difficulty": "challenging" - }, - { - "question_id": 745, - "db_id": "superhero", - "question": "Give the publisher ID of Star Trek.", - "evidence": "Star Trek is the publisher_name;", - "SQL": "SELECT id FROM publisher WHERE publisher_name = 'Star Trek'", - "difficulty": "simple" - }, - { - "question_id": 746, - "db_id": "superhero", - "question": "Calculate the average attribute value of all superheroes.", - "evidence": "average attribute value of all superheroes refers to AVG(attribute_value)", - "SQL": "SELECT AVG(attribute_value) FROM hero_attribute", - "difficulty": "simple" - }, - { - "question_id": 747, - "db_id": "superhero", - "question": "What is the total number of superheroes without full name?", - "evidence": "superheroes without full name refers to full_name IS NULL", - "SQL": "SELECT COUNT(id) FROM superhero WHERE full_name IS NULL", - "difficulty": "simple" - }, - { - "question_id": 748, - "db_id": "superhero", - "question": "What is the eye colour of superhero with superhero ID 75?", - "evidence": "eye colour refers to colour where eye_colour_id = colour.id;", - "SQL": "SELECT T2.colour FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.id = 75", - "difficulty": "simple" - }, - { - "question_id": 749, - "db_id": "superhero", - "question": "Provide the superpowers of the superhero called Deathlok.", - "evidence": "superpowers refers to power_name; Deathlok refers to superhero_name = 'Deathlok'", - "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T1.superhero_name = 'Deathlok'", - "difficulty": "simple" - }, - { - "question_id": 750, - "db_id": "superhero", - "question": "What is the average weight of all female superheroes?", - "evidence": "female refers to gender_id = 2; average weight refers to AVG(weight_kg)", - "SQL": "SELECT AVG(T1.weight_kg) FROM superhero AS T1 INNER JOIN gender AS T2 ON T1.gender_id = T2.id WHERE T2.gender = 'Female'", - "difficulty": "simple" - }, - { - "question_id": 751, - "db_id": "superhero", - "question": "List down at least five superpowers of male superheroes.", - "evidence": "male refers to gender = 'Male'; superpowers refers to power_name;", - "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T3.id = T2.power_id INNER JOIN gender AS T4 ON T4.id = T1.gender_id WHERE T4.gender = 'Male' LIMIT 5", - "difficulty": "moderate" - }, - { - "question_id": 752, - "db_id": "superhero", - "question": "Give the name of the alien superheroes.", - "evidence": "alien superheroes refers to race = 'Alien'; name of superhero refers to superhero_name;", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T2.race = 'Alien'", - "difficulty": "simple" - }, - { - "question_id": 753, - "db_id": "superhero", - "question": "Among the superheroes with height from 170 to 190, list the names of the superheroes with no eye color.", - "evidence": "height from 170 to 190 refers to height_cm BETWEEN 170 AND 190; no eye color refers to eye_colour_id = 1", - "SQL": "SELECT DISTINCT T1.superhero_name FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.height_cm BETWEEN 170 AND 190 AND T2.colour LIKE 'No Colour'", - "difficulty": "moderate" - }, - { - "question_id": 754, - "db_id": "superhero", - "question": "What is the superpower of hero ID 56?", - "evidence": "superpower refers to hero_power", - "SQL": "SELECT T2.power_name FROM hero_power AS T1 INNER JOIN superpower AS T2 ON T1.power_id = T2.id WHERE T1.hero_id = 56", - "difficulty": "simple" - }, - { - "question_id": 755, - "db_id": "superhero", - "question": "List down at least five full name of Demi-God superheroes.", - "evidence": "Demi-God superheroes refers to race = 'Demi-God'", - "SQL": "SELECT T1.full_name FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T2.race = 'Demi-God'", - "difficulty": "simple" - }, - { - "question_id": 756, - "db_id": "superhero", - "question": "How many bad superheroes are there?", - "evidence": "bad superheroes refers to alignment_id = Bad", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN alignment AS T2 ON T1.alignment_id = T2.id WHERE T2.alignment = 'Bad'", - "difficulty": "simple" - }, - { - "question_id": 757, - "db_id": "superhero", - "question": "Identify the race of the superhero who weighed 169 kg.", - "evidence": "weighed 169 kg refers to weight_kg = 169", - "SQL": "SELECT T2.race FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T1.weight_kg = 169", - "difficulty": "simple" - }, - { - "question_id": 758, - "db_id": "superhero", - "question": "Provide the hair colour of the human superhero who is 185 cm tall.", - "evidence": "185 cm tall refers to height_cm = 185; human superhero refers to race = 'human'; hair colour refers to colour where hair_colour_id = colour.id;", - "SQL": "SELECT DISTINCT T3.colour FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id INNER JOIN colour AS T3 ON T1.hair_colour_id = T3.id WHERE T1.height_cm = 185 AND T2.race = 'Human'", - "difficulty": "moderate" - }, - { - "question_id": 759, - "db_id": "superhero", - "question": "What is the eye clolour of the heaviest superhero?", - "evidence": "the heaviest superhero refers to MAX(weight_kg); eye colour refers to colour where eye_colour_id = colour.id;", - "SQL": "SELECT T2.colour FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id ORDER BY T1.weight_kg DESC LIMIT 1", - "difficulty": "simple" - }, - { - "question_id": 760, - "db_id": "superhero", - "question": "In superheroes with height between 150 to 180, what is the percentage of heroes published by Marvel Comics?", - "evidence": "height between 150 to 180 refers to height_cm BETWEEN 150 AND 180; heroes published by Marvel Comics refers to publisher_id = 13; calculation = MULTIPLY(DIVIDE(SUM(publisher.id = 13)), COUNT(publisher.id), 100)", - "SQL": "SELECT CAST(COUNT(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T1.height_cm BETWEEN 150 AND 180", - "difficulty": "challenging" - }, - { - "question_id": 761, - "db_id": "superhero", - "question": "Among the male superheroes, list the full names of superheroes with weight greater than the 79% average weight of all superheroes.", - "evidence": "Calculation = weight_kg > MULTIPLY(AVG(weight_kg), 0.79)", - "SQL": "SELECT T1.full_name FROM superhero AS T1 INNER JOIN gender AS T2 ON T1.gender_id = T2.id WHERE T2.gender = 'Male' AND T1.weight_kg * 100 > ( SELECT AVG(weight_kg) FROM superhero ) * 79", - "difficulty": "moderate" - }, - { - "question_id": 762, - "db_id": "superhero", - "question": "Which power do superheroes have the most of?", - "evidence": "power that superheroes have the most refers to MAX(COUNT(power_name))", - "SQL": "SELECT T2.power_name FROM hero_power AS T1 INNER JOIN superpower AS T2 ON T1.power_id = T2.id GROUP BY T2.power_name ORDER BY COUNT(T1.hero_id) DESC LIMIT 1", - "difficulty": "simple" - }, - { - "question_id": 763, - "db_id": "superhero", - "question": "Indicate the attribute value of superhero Abomination.", - "evidence": "Abomination refers to superhero_name = 'Abomination';", - "SQL": "SELECT T2.attribute_value FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id WHERE T1.superhero_name = 'Abomination'", - "difficulty": "simple" - }, - { - "question_id": 764, - "db_id": "superhero", - "question": "What are the superpowers of heroes with ID 1?", - "evidence": "superpowers refers to power_name; heroes with ID 1 refers to hero_id = 1;", - "SQL": "SELECT DISTINCT T2.power_name FROM hero_power AS T1 INNER JOIN superpower AS T2 ON T1.power_id = T2.id WHERE T1.hero_id = 1", - "difficulty": "simple" - }, - { - "question_id": 765, - "db_id": "superhero", - "question": "How many heroes have stealth power?", - "evidence": "stealth power refers to power_name = 'stealth';", - "SQL": "SELECT COUNT(T1.hero_id) FROM hero_power AS T1 INNER JOIN superpower AS T2 ON T1.power_id = T2.id WHERE T2.power_name = 'Stealth'", - "difficulty": "simple" - }, - { - "question_id": 766, - "db_id": "superhero", - "question": "What is the hero's full name with the highest attribute in strength?", - "evidence": "highest attribute in strength refers to MAX(attribute_value) WHERE attribute_name = 'strength';", - "SQL": "SELECT T1.full_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T3.attribute_name = 'Strength' ORDER BY T2.attribute_value DESC LIMIT 1", - "difficulty": "moderate" - }, - { - "question_id": 767, - "db_id": "superhero", - "question": "What is the average of superheroes with no skin colour?", - "evidence": "average = DIVIDE(COUNT(superhero.id), SUM(skin_colour_id = 1)); no skin colour refers to skin_colour_id WHERE colour.id = 1;", - "SQL": "SELECT CAST(COUNT(*) AS REAL) / SUM(CASE WHEN T2.id = 1 THEN 1 ELSE 0 END) FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.skin_colour_id = T2.id", - "difficulty": "simple" - }, - { - "question_id": 768, - "db_id": "superhero", - "question": "How many superheroes were published by Dark Horse Comics?", - "evidence": "published by Dark Horse Comics refers to publisher_name = 'Dark Horse Comics';", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T2.publisher_name = 'Dark Horse Comics'", - "difficulty": "simple" - }, - { - "question_id": 769, - "db_id": "superhero", - "question": "Which superhero has the most durability published by Dark Horse Comics?", - "evidence": "which superhero refers to superhero_name; most durability refers to MAX(attribute_value) WHERE attribute_name = 'durability'; published by Dark Horse Comics refers to publisher_name = 'Dark Horse Comics';", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T3.id = T2.attribute_id INNER JOIN publisher AS T4 ON T4.id = T1.publisher_id WHERE T4.publisher_name = 'Dark Horse Comics' AND T3.attribute_name = 'Durability' ORDER BY T2.attribute_value DESC LIMIT 1", - "difficulty": "challenging" - }, - { - "question_id": 770, - "db_id": "superhero", - "question": "What is the eyes colour of Abraham Sapien?", - "evidence": "eye colour refers to colour.colour where eye_colour_id = colour.id; Abraham Sapien is the full name of superhero;", - "SQL": "SELECT T2.colour FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.full_name = 'Abraham Sapien'", - "difficulty": "simple" - }, - { - "question_id": 771, - "db_id": "superhero", - "question": "List the name of superheroes with flight power.", - "evidence": "name of superheroes refers to superhero_name; flight power refers to power_name = 'Flight';", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T3.power_name = 'Flight'", - "difficulty": "simple" - }, - { - "question_id": 772, - "db_id": "superhero", - "question": "List the eyes, hair and skin colour of all female superheroes published by Dark Horse Comics.", - "evidence": "eyes refers to eye_colour_id; hair refers to hair_colour_id; skin colour refers to skin_colour_id; female superheroes refers to gender = 'Female'; published by Dark Horse Comics refers to publisher_name = 'Dark Horse Comics';", - "SQL": "SELECT T1.eye_colour_id, T1.hair_colour_id, T1.skin_colour_id FROM superhero AS T1 INNER JOIN publisher AS T2 ON T2.id = T1.publisher_id INNER JOIN gender AS T3 ON T3.id = T1.gender_id WHERE T2.publisher_name = 'Dark Horse Comics' AND T3.gender = 'Female'", - "difficulty": "challenging" - }, - { - "question_id": 773, - "db_id": "superhero", - "question": "Which superhero has the same eyes, hair and skin colour? Indicate the publisher of the superhero.", - "evidence": "which superhero refers to superhero_name; the same eyes, hair and skin colour refers to hair_colour_id = skin_colour_id AND hair_colour_id = eye_colour_id; publisher refers to publisher_name;", - "SQL": "SELECT T1.superhero_name, T2.publisher_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T1.eye_colour_id = T1.hair_colour_id AND T1.eye_colour_id = T1.skin_colour_id", - "difficulty": "challenging" - }, - { - "question_id": 774, - "db_id": "superhero", - "question": "Which group does superhero A-Bomb belong to?", - "evidence": "group refers to race; A-Bomb refers to superhero_name = 'A-Bomb';", - "SQL": "SELECT T2.race FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T1.superhero_name = 'A-Bomb'", - "difficulty": "simple" - }, - { - "question_id": 775, - "db_id": "superhero", - "question": "What is the percentage of blue female superheroes among all female superheroes?", - "evidence": "percentage = MULTIPLY(DIVIDE(SUM(colour = 'Blue' WHERE gender = 'Female'), COUNT(gender = 'Female')), 100); blue refers to the color; female refers to gender = 'Female';", - "SQL": "SELECT CAST(COUNT(CASE WHEN T3.colour = 'Blue' THEN T1.id ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN gender AS T2 ON T1.gender_id = T2.id INNER JOIN colour AS T3 ON T1.skin_colour_id = T3.id WHERE T2.gender = 'Female'", - "difficulty": "challenging" - }, - { - "question_id": 776, - "db_id": "superhero", - "question": "Provide the hero name and race of Charles Chandler.", - "evidence": "hero name refers to superhero_name; Charles Chandler is the full name of superhero;", - "SQL": "SELECT T1.superhero_name, T2.race FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T1.full_name = 'Charles Chandler'", - "difficulty": "simple" - }, - { - "question_id": 777, - "db_id": "superhero", - "question": "What is the gender of Agent 13 hero?", - "evidence": "Agent 13 hero refers to superhero_name = 'Agent 13';", - "SQL": "SELECT T2.gender FROM superhero AS T1 INNER JOIN gender AS T2 ON T1.gender_id = T2.id WHERE T1.superhero_name = 'Agent 13'", - "difficulty": "simple" - }, - { - "question_id": 778, - "db_id": "superhero", - "question": "Provide superheroes' names who have the adaptation power.", - "evidence": "adaptation power refers to power_name = 'Adaptation';", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T3.power_name = 'Adaptation'", - "difficulty": "simple" - }, - { - "question_id": 779, - "db_id": "superhero", - "question": "How many powers does Amazo hero have?", - "evidence": "Amazo hero refers to superhero_name = 'Amazo';", - "SQL": "SELECT COUNT(T1.power_id) FROM hero_power AS T1 INNER JOIN superhero AS T2 ON T1.hero_id = T2.id WHERE T2.superhero_name = 'Amazo'", - "difficulty": "simple" - }, - { - "question_id": 780, - "db_id": "superhero", - "question": "List the powers of Hunter Zolomon.", - "evidence": "Hunter Zolomon is the full name of superhero; list the powers refers to power_name;", - "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T1.full_name = 'Hunter Zolomon'", - "difficulty": "simple" - }, - { - "question_id": 781, - "db_id": "superhero", - "question": "Provide the heights of the heroes whose eye colours are amber.", - "evidence": "heights of the heroes refers to height_cm; eye colours are amber refers to colour.colour = 'Amber' WHERE eye_colour_id = colour.id;", - "SQL": "SELECT T1.height_cm FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T2.colour = 'Amber'", - "difficulty": "simple" - }, - { - "question_id": 782, - "db_id": "superhero", - "question": "List the heroes' names whose eyes and hair colours are both black.", - "evidence": "heroes' names refers to superhero_name; eyes and hair colours are both black refers to eye_colour_id AND hair_colour_id WHERE colour.colour = 'Black';", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id AND T1.hair_colour_id = T2.id WHERE T2.colour = 'Black'", - "difficulty": "moderate" - }, - { - "question_id": 783, - "db_id": "superhero", - "question": "Provide the eye colours of the heroes whose skin colours are gold.", - "evidence": "skin colours are gold refers to colour.colour = 'Gold' WHERE skin_colour_id = colour.id;", - "SQL": "SELECT T2.colour FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id INNER JOIN colour AS T3 ON T1.skin_colour_id = T3.id WHERE T3.colour = 'Gold'", - "difficulty": "simple" - }, - { - "question_id": 784, - "db_id": "superhero", - "question": "Provide the full names of vampire heroes.", - "evidence": "vampire heroes refers to race = 'Vampire';", - "SQL": "SELECT T1.full_name FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T2.race = 'Vampire'", - "difficulty": "simple" - }, - { - "question_id": 785, - "db_id": "superhero", - "question": "Describe the names of neutral alignment superheroes.", - "evidence": "names of superheroes refers to superhero_name; neutral alignment refers to alignment = 'Neutral';", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN alignment AS T2 ON T1.alignment_id = T2.id WHERE T2.alignment = 'Neutral'", - "difficulty": "simple" - }, - { - "question_id": 786, - "db_id": "superhero", - "question": "How many heroes have the highest attribute value in strength?", - "evidence": "highest attribute value in strength refers to MAX(attribute_value) WHERE attribute_name = 'Strength';", - "SQL": "SELECT COUNT(T1.hero_id) FROM hero_attribute AS T1 INNER JOIN attribute AS T2 ON T1.attribute_id = T2.id WHERE T2.attribute_name = 'Strength' AND T1.attribute_value = ( SELECT MAX(attribute_value) FROM hero_attribute )", - "difficulty": "moderate" - }, - { - "question_id": 787, - "db_id": "superhero", - "question": "What are the race and alignment of Cameron Hicks?", - "evidence": "Cameron Hicks refers to superhero_name = 'Cameron Hicks';", - "SQL": "SELECT T2.race, T3.alignment FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id INNER JOIN alignment AS T3 ON T1.alignment_id = T3.id WHERE T1.superhero_name = 'Cameron Hicks'", - "difficulty": "simple" - }, - { - "question_id": 788, - "db_id": "superhero", - "question": "How many percent of female heroes were published by Marvel Comics?", - "evidence": "percent = MULTIPLY(DIVIDE(SUM(gender = 'Female' WHERE publisher_name = 'Marvel Comics'), COUNT(publisher_name = 'Marvel Comics')), 100); female heroes refers to gender = 'Female'; Marvel Comics refers to publisher_name = 'Marvel Comics';", - "SQL": "SELECT CAST(COUNT(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN gender AS T3 ON T1.gender_id = T3.id WHERE T3.gender = 'Female'", - "difficulty": "challenging" - }, - { - "question_id": 789, - "db_id": "superhero", - "question": "Find the average weight of the heroes who are aliens.", - "evidence": "average = AVG(weight_kg); aliens refers to race = 'Alien';", - "SQL": "SELECT CAST(SUM(T1.weight_kg) AS REAL) / COUNT(T1.id) FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T2.race = 'Alien'", - "difficulty": "simple" - }, - { - "question_id": 790, - "db_id": "superhero", - "question": "Calculate the difference between Emil Blonsky's weight and Charles Chandler's weight.", - "evidence": "difference = SUBTRACT(SUM(weight_kg WHERE full_name = 'Emil Blonsky'), SUM(weight_kg WHERE full_name = 'Charles Chandler')); Emil Blonsky is the full name of superhero; Charles Chandler is the full name of superhero;", - "SQL": "SELECT ( SELECT weight_kg FROM superhero WHERE full_name LIKE 'Emil Blonsky' ) - ( SELECT weight_kg FROM superhero WHERE full_name LIKE 'Charles Chandler' ) AS CALCULATE", - "difficulty": "moderate" - }, - { - "question_id": 791, - "db_id": "superhero", - "question": "Calculate the average height for each superhero.", - "evidence": "average = DIVIDE(SUM(height_cm), COUNT(all heros));", - "SQL": "SELECT CAST(SUM(height_cm) AS REAL) / COUNT(id) FROM superhero", - "difficulty": "simple" - }, - { - "question_id": 792, - "db_id": "superhero", - "question": "What is Abomination's superpower?", - "evidence": "Abomination refers to superhero_name = 'Abomination'; superpower refers to power_name;", - "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T1.superhero_name = 'Abomination'", - "difficulty": "simple" - }, - { - "question_id": 793, - "db_id": "superhero", - "question": "Among the superheroes with the race of god/eternal, how many of them are male", - "evidence": "race \"god/eternal\" refers to race_id = 21; male refers to gender.id = 1", - "SQL": "SELECT COUNT(*) FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id INNER JOIN gender AS T3 ON T3.id = T1.gender_id WHERE T1.race_id = 21 AND T1.gender_id = 1", - "difficulty": "simple" - }, - { - "question_id": 794, - "db_id": "superhero", - "question": "Which hero was the fastest?", - "evidence": "which hero refers to superhero_name; fastest refers to MAX(attribute_value) WHERE attribute_name = 'Speed';", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T3.attribute_name = 'Speed' ORDER BY T2.attribute_value DESC LIMIT 1", - "difficulty": "moderate" - }, - { - "question_id": 795, - "db_id": "superhero", - "question": "How many superheroes have a neutral alignment?", - "evidence": "neutral alignment refers to alignment_id = 3;", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN alignment AS T2 ON T1.alignment_id = T2.id WHERE T2.alignment = 'Neutral'", - "difficulty": "simple" - }, - { - "question_id": 796, - "db_id": "superhero", - "question": "State all of 3-D Man's attributes along with their values.", - "evidence": "3-D Man is the superhero_name. attributes refers to attribute_name; values refers to attribute_value;", - "SQL": "SELECT T3.attribute_name, T2.attribute_value FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T1.superhero_name = '3-D Man'", - "difficulty": "moderate" - }, - { - "question_id": 797, - "db_id": "superhero", - "question": "Which superheroes have blue eyes with brown hair?", - "evidence": "which superheroes refers to superhero_name; blue eyes refers to eye_colour_id = 7; brown hair refers to hair_colour_id = 9;", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id INNER JOIN colour AS T3 ON T1.hair_colour_id = T3.id WHERE T2.colour = 'Blue' AND T3.colour = 'Brown'", - "difficulty": "moderate" - }, - { - "question_id": 798, - "db_id": "superhero", - "question": "What is the publisher for Hawkman, Karate Kid and Speedy?", - "evidence": "publisher refers to publisher_name; Hawkman refers to superhero_name = 'Hawkman'; Karate Kid refers to superhero_name = 'Karate Kid'; Speedy refers to superhero_name = 'Speedy';", - "SQL": "SELECT T2.publisher_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T1.superhero_name IN ('Hawkman', 'Karate Kid', 'Speedy')", - "difficulty": "moderate" - }, - { - "question_id": 799, - "db_id": "superhero", - "question": "How many superheroes didn't have any publisher?", - "evidence": "didn't have any publisher refers to publisher.id = 1;", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T2.id = 1", - "difficulty": "simple" - }, - { - "question_id": 800, - "db_id": "superhero", - "question": "Calculate the percentage of superheroes with blue eyes.", - "evidence": "percentage = MULTIPLY(DIVIDE(SUM(superhero_name WHERE eye_colour_id = 7), COUNT(superhero_name)), 100.0); blue eyes refers to eye_colour_id = 7;", - "SQL": "SELECT CAST(COUNT(CASE WHEN T2.colour = 'Blue' THEN 1 ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id", - "difficulty": "moderate" - }, - { - "question_id": 801, - "db_id": "superhero", - "question": "Find the ratio between male superheroes and female superheroes.", - "evidence": "ratio = DIVIDE(SUM(gender_id = 1) / SUM(gender_id = 2)); male superheroes refers to gender_id = 1; female superheroes refers to gender_id = 2;", - "SQL": "SELECT CAST(COUNT(CASE WHEN T2.gender = 'Male' THEN T1.id ELSE NULL END) AS REAL) / COUNT(CASE WHEN T2.gender = 'Female' THEN T1.id ELSE NULL END) FROM superhero AS T1 INNER JOIN gender AS T2 ON T1.gender_id = T2.id", - "difficulty": "moderate" - }, - { - "question_id": 802, - "db_id": "superhero", - "question": "Who is the tallest superhero?", - "evidence": "who refers to superhero_name; tallest superhero refers to MAX(height_cm);", - "SQL": "SELECT superhero_name FROM superhero ORDER BY height_cm DESC LIMIT 1", - "difficulty": "simple" - }, - { - "question_id": 803, - "db_id": "superhero", - "question": "What is the power ID of cryokinesis?", - "evidence": "power ID refers to superpower.id; cryokinesis refers to power_name = 'cryokinesis';", - "SQL": "SELECT id FROM superpower WHERE power_name = 'Cryokinesis'", - "difficulty": "simple" - }, - { - "question_id": 804, - "db_id": "superhero", - "question": "Provide the name of superhero with superhero ID 294.", - "evidence": "name of superhero refers to superhero_name; superhero ID 294 refers to superhero.id = 294;", - "SQL": "SELECT superhero_name FROM superhero WHERE id = 294", - "difficulty": "simple" - }, - { - "question_id": 805, - "db_id": "superhero", - "question": "List the full names of superheroes with missing weight.", - "evidence": "missing weight refers to weight_kg = 0 OR weight_kg = NULL;", - "SQL": "SELECT DISTINCT full_name FROM superhero WHERE full_name IS NOT NULL AND (weight_kg IS NULL OR weight_kg = 0)", - "difficulty": "simple" - }, - { - "question_id": 806, - "db_id": "superhero", - "question": "Provide the eye colour of the superhero who has Karen Beecher-Duncan as their full name.", - "evidence": "eye colour refers to colour.colour where eye_colour_id = colour.id; Karen Beecher-Duncan is the full name of superhero;", - "SQL": "SELECT T2.colour FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.full_name = 'Karen Beecher-Duncan'", - "difficulty": "simple" - }, - { - "question_id": 807, - "db_id": "superhero", - "question": "What is the superpowers of the superhero has Helen Parr as their full name?", - "evidence": "superpowers refers to power_name; Helen Parr is the full name of superhero;", - "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T1.full_name = 'Helen Parr'", - "difficulty": "simple" - }, - { - "question_id": 808, - "db_id": "superhero", - "question": "Find the race of the superhero who weighs 108kg and is 188cm tall.", - "evidence": "weighs 108kg refers to weight_kg = 108; 188cm tall refers to height_cm = 188;", - "SQL": "SELECT DISTINCT T2.race FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T1.weight_kg = 108 AND T1.height_cm = 188", - "difficulty": "simple" - }, - { - "question_id": 809, - "db_id": "superhero", - "question": "What is the publisher name of the superhero ID 38?", - "evidence": "superhero ID 38 refers to superhero.id = 38;", - "SQL": "SELECT T2.publisher_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T1.id = 38", - "difficulty": "simple" - }, - { - "question_id": 810, - "db_id": "superhero", - "question": "What is the race of the superhero with maximum attribute value?", - "evidence": "maximum attribute value refers to MAX(attribute_value);", - "SQL": "SELECT T3.race FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN race AS T3 ON T1.race_id = T3.id ORDER BY T2.attribute_value DESC LIMIT 1", - "difficulty": "simple" - }, - { - "question_id": 811, - "db_id": "superhero", - "question": "Give the alignment and superpowers of the superhero named Atom IV.", - "evidence": "superpowers refers to power_name;", - "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T3.id = T2.power_id WHERE T1.superhero_name = 'Atom IV'", - "difficulty": "simple" - }, - { - "question_id": 812, - "db_id": "superhero", - "question": "List down at least five full names of superheroes with blue eyes.", - "evidence": "blue eyes refers to colour.colour = 'Blue' WHERE eye_colour_id = colour.id;", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T2.colour = 'Blue' LIMIT 5", - "difficulty": "simple" - }, - { - "question_id": 813, - "db_id": "superhero", - "question": "Calculate the average attribute value of all neutral superheroes.", - "evidence": "average = AVG(attribute_value); neutral superheroes refers to alignment_id = 3;", - "SQL": "SELECT AVG(T1.attribute_value) FROM hero_attribute AS T1 INNER JOIN superhero AS T2 ON T1.hero_id = T2.id INNER JOIN alignment AS T3 ON T2.alignment_id = T3.id WHERE T3.alignment = 'Neutral'", - "difficulty": "simple" - }, - { - "question_id": 814, - "db_id": "superhero", - "question": "List the skin colour of the superheroes with 100 attribute value.", - "evidence": "skin colour refers to colour.colour where skin_colour_id = colour.id; 100 attribute value refers to attribute_value = 100;", - "SQL": "SELECT DISTINCT T2.colour FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.skin_colour_id = T2.id INNER JOIN hero_attribute AS T3 ON T1.id = T3.hero_id WHERE T3.attribute_value = 100", - "difficulty": "moderate" - }, - { - "question_id": 815, - "db_id": "superhero", - "question": "Count the good female superheroes.", - "evidence": "good refers to alignment.id = 1; female refers to gender.id = 2;", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN alignment AS T2 ON T1.alignment_id = T2.id INNER JOIN gender AS T3 ON T1.gender_id = T3.id WHERE T2.alignment = 'Good' AND T3.gender = 'Female'", - "difficulty": "simple" - }, - { - "question_id": 816, - "db_id": "superhero", - "question": "Provide the names of superheroes with attribute value between 75 to 80.", - "evidence": "names of superheroes refers to superhero_name; attribute value between 75 to 80 refers to attribute_value BETWEEN 75 AND 80;", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id WHERE T2.attribute_value BETWEEN 75 AND 80", - "difficulty": "simple" - }, - { - "question_id": 817, - "db_id": "superhero", - "question": "Give the race of the blue-haired male superhero.", - "evidence": "blue-haired refers to colour.colour = 'blue' WHERE hair_colour_id = colour.id; male refers to gender = 'male';", - "SQL": "SELECT T3.race FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.hair_colour_id = T2.id INNER JOIN race AS T3 ON T1.race_id = T3.id INNER JOIN gender AS T4 ON T1.gender_id = T4.id WHERE T2.colour = 'Blue' AND T4.gender = 'Male'", - "difficulty": "moderate" - }, - { - "question_id": 818, - "db_id": "superhero", - "question": "Among the bad superheroes, what is the percentage of female superheroes?", - "evidence": "bad superheroes refers to alignment.id = 2; percentage = MULTIPLY(DIVIDE(SUM(gender.id = 2 WHERE alignment.id = 2), COUNT(alignment.id = 2)), 100.0); female refers to gender.id = 2;", - "SQL": "SELECT CAST(COUNT(CASE WHEN T3.gender = 'Female' THEN T1.id ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN alignment AS T2 ON T1.alignment_id = T2.id INNER JOIN gender AS T3 ON T1.gender_id = T3.id WHERE T2.alignment = 'Bad'", - "difficulty": "challenging" - }, - { - "question_id": 819, - "db_id": "superhero", - "question": "In superheroes with missing weight data, calculate the difference between the number of superheroes with blue eyes and no eye color.", - "evidence": "missing weight data refers to weight_kg = 0 OR T1.weight_kg = NULL; difference = SUBTRACT(SUM(colour.id = 7), SUM(colour.id = 1)); blue eyes refers to eye_colour_id WHERE colour.id = 7; no eye color refers to eye_colour_id WHERE colour.id = 1;", - "SQL": "SELECT SUM(CASE WHEN T2.id = 7 THEN 1 ELSE 0 END) - SUM(CASE WHEN T2.id = 1 THEN 1 ELSE 0 END) FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.weight_kg = 0 OR T1.weight_kg is NULL", - "difficulty": "challenging" - }, - { - "question_id": 820, - "db_id": "superhero", - "question": "How strong is the Hulk?", - "evidence": "how strong refers to attribute_value WHERE attribute_name = 'Strength'; the Hulk refers to superhero_name = 'Hulk';", - "SQL": "SELECT T2.attribute_value FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T1.superhero_name = 'Hulk' AND T3.attribute_name = 'Strength'", - "difficulty": "moderate" - }, - { - "question_id": 821, - "db_id": "superhero", - "question": "List down Ajax's superpowers.", - "evidence": "Ajax refers to superhero_name = 'Ajax'; superpowers refers to power_name;", - "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T1.superhero_name = 'Ajax'", - "difficulty": "simple" - }, - { - "question_id": 822, - "db_id": "superhero", - "question": "How many green-skinned villains are there in the superhero universe?", - "evidence": "green-skinned refers to colour.colour = 'Green' WHERE skin_colour_id = colour.id; villains refers to alignment = 'Bad';", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN alignment AS T2 ON T1.alignment_id = T2.id INNER JOIN colour AS T3 ON T1.skin_colour_id = T3.id WHERE T2.alignment = 'Bad' AND T3.colour = 'Green'", - "difficulty": "moderate" - }, - { - "question_id": 823, - "db_id": "superhero", - "question": "How many female superheroes are in Marvel Comics?", - "evidence": "female refers to gender = 'Female'; Marvel Comics refers to publisher_name = 'Marvel Comics';", - "SQL": "SELECT COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN gender AS T3 ON T1.gender_id = T3.id WHERE T2.publisher_name = 'Marvel Comics' AND T3.gender = 'Female'", - "difficulty": "moderate" - }, - { - "question_id": 824, - "db_id": "superhero", - "question": "Identify superheroes who can control wind and list their names in alphabetical order.", - "evidence": "superheroes refers to superhero_name; can control wind refers to power_name = 'Wind Control';", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T3.power_name = 'Wind Control' ORDER BY T1.superhero_name", - "difficulty": "moderate" - }, - { - "question_id": 825, - "db_id": "superhero", - "question": "Identify the gender of the superhero who has the ability of Phoenix Force.", - "evidence": "ability of Phoenix Force refers to power_name = 'Phoenix Force';", - "SQL": "SELECT T4.gender FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id INNER JOIN gender AS T4 ON T1.gender_id = T4.id WHERE T3.power_name = 'Phoenix Force'", - "difficulty": "moderate" - }, - { - "question_id": 826, - "db_id": "superhero", - "question": "Identify the heaviest superhero in DC Comics.", - "evidence": "heaviest refers to MAX(weight_kg); DC Comics refers to publisher_name = 'DC Comics'; superhero refers to superhero_name;", - "SQL": "SELECT T1.superhero_name FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id WHERE T2.publisher_name = 'DC Comics' ORDER BY T1.weight_kg DESC LIMIT 1", - "difficulty": "simple" - }, - { - "question_id": 827, - "db_id": "superhero", - "question": "What is the average height of a non-human superhero in Dark Horse Comics?", - "evidence": "average = AVG(height_cm); non-human superhero refers to race <> 'Human'; Dark Horse Comics refers to publisher_name = 'Dark Horse Comics';", - "SQL": "SELECT AVG(T1.height_cm) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN race AS T3 ON T1.race_id = T3.id WHERE T2.publisher_name = 'Dark Horse Comics' AND T3.race != 'Human'", - "difficulty": "moderate" - }, - { - "question_id": 829, - "db_id": "superhero", - "question": "Which publisher created more superheroes: DC or Marvel Comics? Find the difference in the number of superheroes.", - "evidence": "DC refers to publisher_name = 'DC Comics'; Marvel Comics refers to publisher_name = 'Marvel Comics'; if SUM(publisher_name = 'DC Comics') > SUM(publisher_name = 'Marvel Comics'), it means DC Comics published more superheroes than Marvel Comics; if SUM(publisher_name = 'Marvel Comics') > SUM(publisher_name = 'Marvel Comics'), it means Marvel Comics published more heroes than DC Comics; difference = SUBTRACT(SUM(publisher_name = 'DC Comics'), SUM(publisher_name = 'Marvel Comics'));", - "SQL": "SELECT SUM(CASE WHEN T2.publisher_name = 'DC Comics' THEN 1 ELSE 0 END) - SUM(CASE WHEN T2.publisher_name = 'Marvel Comics' THEN 1 ELSE 0 END) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id", - "difficulty": "challenging" - }, - { - "question_id": 830, - "db_id": "superhero", - "question": "Identify the weakest attribute of the Black Panther.", - "evidence": "weakest attribute refers to attribute_name WHERE MIN(attribute_value); Black Panther refers to superhero_name = 'Black Panther';", - "SQL": "SELECT T3.attribute_name FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id INNER JOIN attribute AS T3 ON T2.attribute_id = T3.id WHERE T1.superhero_name = 'Black Panther' ORDER BY T2.attribute_value ASC LIMIT 1", - "difficulty": "moderate" - }, - { - "question_id": 831, - "db_id": "superhero", - "question": "What is Abomination's eye colour?", - "evidence": "Abomination refers to superhero_name = 'Abomination'; eye colour refers to colour.colour where eye_colour_id = colour.id;", - "SQL": "SELECT T2.colour FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.superhero_name = 'Abomination'", - "difficulty": "simple" - }, - { - "question_id": 832, - "db_id": "superhero", - "question": "Name the tallest superhero.", - "evidence": "tallest superhero refers to MAX(height_cm);", - "SQL": "SELECT superhero_name FROM superhero ORDER BY height_cm DESC LIMIT 1", - "difficulty": "simple" - }, - { - "question_id": 833, - "db_id": "superhero", - "question": "Name the superhero, otherwise known as Charles Chandler.", - "evidence": "name the superhero refers to superhero_name; Charles Chandler is the full name of superhero;", - "SQL": "SELECT superhero_name FROM superhero WHERE full_name = 'Charles Chandler'", - "difficulty": "simple" - }, - { - "question_id": 834, - "db_id": "superhero", - "question": "Among all superheroes created by George Lucas, identify the percentage of female superheroes.", - "evidence": "created by George Lucas refers to publisher_name = 'George Lucas'; percentage = MULTIPLY(DIVIDE(SUM(gender = 'Female' WHERE publisher_name = 'George Lucas'), COUNT(publisher_name = 'George Lucas')), 100.0); female refers to gender = 'Female';", - "SQL": "SELECT CAST(COUNT(CASE WHEN T3.gender = 'Female' THEN 1 ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN gender AS T3 ON T1.gender_id = T3.id WHERE T2.publisher_name = 'George Lucas'", - "difficulty": "challenging" - }, - { - "question_id": 835, - "db_id": "superhero", - "question": "Among all superheroes in Marvel Comics, identify the percentage of 'good' superheroes.", - "evidence": "Marvel Comics refers to publisher_name = 'Marvel Comics'; percentage = MULTIPLY(DIVIDE(SUM(alignment = 'Good' WHERE publisher_name = 'Marvel Comics'), COUNT(publisher_name = 'Marvel Comics')), 100.0); good superheroes refers to alignment = 'Good';", - "SQL": "SELECT CAST(COUNT(CASE WHEN T3.alignment = 'Good' THEN T1.id ELSE NULL END) AS REAL) * 100 / COUNT(T1.id) FROM superhero AS T1 INNER JOIN publisher AS T2 ON T1.publisher_id = T2.id INNER JOIN alignment AS T3 ON T1.alignment_id = T3.id WHERE T2.publisher_name = 'Marvel Comics'", - "difficulty": "challenging" - }, - { - "question_id": 836, - "db_id": "superhero", - "question": "What is the total number of superheroes that have John as their first name?", - "evidence": "have John as their first name refers to full_name LIKE 'John%';", - "SQL": "SELECT COUNT(id) FROM superhero WHERE full_name LIKE 'John%'", - "difficulty": "simple" - }, - { - "question_id": 837, - "db_id": "superhero", - "question": "Give the hero ID of superhero with the lowest attribute value.", - "evidence": "lowest attribute value refers to MIN(attribute_value);", - "SQL": "SELECT hero_id FROM hero_attribute WHERE attribute_value = ( SELECT MIN(attribute_value) FROM hero_attribute )", - "difficulty": "simple" - }, - { - "question_id": 838, - "db_id": "superhero", - "question": "Provide the full name of the superhero named Alien.", - "evidence": "", - "SQL": "SELECT full_name FROM superhero WHERE superhero_name = 'Alien'", - "difficulty": "simple" - }, - { - "question_id": 839, - "db_id": "superhero", - "question": "In superheroes with weight less than 100, list the full name of the superheroes with brown eyes.", - "evidence": "weight less than 100 refers to weight_kg < 100", - "SQL": "SELECT T1.full_name FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.weight_kg < 100 AND T2.colour = 'Brown'", - "difficulty": "simple" - }, - { - "question_id": 840, - "db_id": "superhero", - "question": "List the attribute value of the superhero named Aquababy.", - "evidence": "", - "SQL": "SELECT T2.attribute_value FROM superhero AS T1 INNER JOIN hero_attribute AS T2 ON T1.id = T2.hero_id WHERE T1.superhero_name = 'Aquababy'", - "difficulty": "simple" - }, - { - "question_id": 841, - "db_id": "superhero", - "question": "Provide the weight and race of the superhero with superhero ID 40.", - "evidence": "weight refers to weight_kg; superhero ID 40 refers to superhero.id = 40;", - "SQL": "SELECT T1.weight_kg, T2.race FROM superhero AS T1 INNER JOIN race AS T2 ON T1.race_id = T2.id WHERE T1.id = 40", - "difficulty": "simple" - }, - { - "question_id": 842, - "db_id": "superhero", - "question": "Calculate the average height of all neutral superheroes.", - "evidence": "", - "SQL": "SELECT AVG(T1.height_cm) FROM superhero AS T1 INNER JOIN alignment AS T2 ON T1.alignment_id = T2.id WHERE T2.alignment = 'Neutral'", - "difficulty": "simple" - }, - { - "question_id": 843, - "db_id": "superhero", - "question": "List the hero ID of superheroes have intellegence as their power.", - "evidence": "hero ID refers to superhero.id; have intelligence as their power refers to power_name = 'Intelligence';", - "SQL": "SELECT T1.hero_id FROM hero_power AS T1 INNER JOIN superpower AS T2 ON T1.power_id = T2.id WHERE T2.power_name = 'Intelligence'", - "difficulty": "simple" - }, - { - "question_id": 844, - "db_id": "superhero", - "question": "Give the eye colour of Blackwulf.", - "evidence": "eye colour refers to colour.colour where eye_colour_id = colour.id; Blackwulf refers to superhero_name = 'Blackwulf';", - "SQL": "SELECT T2.colour FROM superhero AS T1 INNER JOIN colour AS T2 ON T1.eye_colour_id = T2.id WHERE T1.superhero_name = 'Blackwulf'", - "difficulty": "simple" - }, - { - "question_id": 845, - "db_id": "superhero", - "question": "List the power of superheroes with height greater than 80% of the average height of all superheroes.", - "evidence": "power of superheroes refers to power_name; height greater than 80% of the average height of all superheroes = height_cm > MULTIPLY(AVG(height_cm), 0.8);", - "SQL": "SELECT T3.power_name FROM superhero AS T1 INNER JOIN hero_power AS T2 ON T1.id = T2.hero_id INNER JOIN superpower AS T3 ON T2.power_id = T3.id WHERE T1.height_cm * 100 > ( SELECT AVG(height_cm) FROM superhero ) * 80", - "difficulty": "moderate" - } -] \ No newline at end of file diff --git a/examples/__init__.py b/examples/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/examples/config.py b/examples/config.py deleted file mode 100644 index fbd69dd0..00000000 --- a/examples/config.py +++ /dev/null @@ -1,20 +0,0 @@ -from paths import PATH_ROOT -from pydantic_settings import BaseSettings - - -class CoreConfig(BaseSettings): - """db-ally configuration""" - - pg_connection_string: str = "" - openai_api_key: str = "" - anyscale_api_key: str = "" - - class Config: - """Config for env class.""" - - env_file = str(PATH_ROOT / ".env") - env_file_encoding = "utf-8" - extra = "allow" - - -config = CoreConfig() diff --git a/examples/paths.py b/examples/paths.py deleted file mode 100644 index de76ac8b..00000000 --- a/examples/paths.py +++ /dev/null @@ -1,6 +0,0 @@ -from pathlib import Path - -import dbally - -PATH_PACKAGE = Path(dbally.__file__).parent -PATH_ROOT = PATH_PACKAGE.parent.parent diff --git a/examples/superhero_example.py b/examples/superhero_example.py deleted file mode 100644 index cd0f0ec6..00000000 --- a/examples/superhero_example.py +++ /dev/null @@ -1,290 +0,0 @@ -# pylint: disable=missing-docstring, missing-return-doc, missing-param-doc, duplicate-code -import asyncio -import tempfile - -import sqlalchemy -from config import config -from sqlalchemy import create_engine -from sqlalchemy.dialects.postgresql import ARRAY -from sqlalchemy.ext.automap import automap_base -from sqlalchemy.orm import aliased -from typing_extensions import Annotated - -import dbally -from dbally import SqlAlchemyBaseView, decorators -from dbally.audit.event_handlers.cli_event_handler import CLIEventHandler -from dbally.embedding_client.openai import OpenAiEmbeddingClient -from dbally.similarity.faiss_store import FaissStore -from dbally.similarity.index import SimilarityIndex -from dbally.similarity.sqlalchemy_base import CaseInsensitiveSqlAlchemyStore, SimpleSqlAlchemyFetcher - -engine = create_engine(config.pg_connection_string + "/superhero") -SuperheroModel = automap_base() -SuperheroModel.prepare(autoload_with=engine, reflect=True) - -eye_color_alias = aliased(SuperheroModel.classes.colour) -hair_color_alias = aliased(SuperheroModel.classes.colour) -skin_color_alias = aliased(SuperheroModel.classes.colour) - -hero_power = SuperheroModel.metadata.tables["hero_power"] -hero_attr = SuperheroModel.metadata.tables["hero_attribute"] - - -class SuperheroDBSchema: - id = SuperheroModel.classes.superhero.id - name = SuperheroModel.classes.superhero.superhero_name - full_name = SuperheroModel.classes.superhero.full_name - gender = SuperheroModel.classes.gender.gender - race = SuperheroModel.classes.race.race - publisher_name = SuperheroModel.classes.publisher.publisher_name - alignment = SuperheroModel.classes.alignment.alignment - weight_kg = SuperheroModel.classes.superhero.weight_kg - height_cm = SuperheroModel.classes.superhero.height_cm - eye_color = eye_color_alias.colour.label("eye_color") - hair_color = hair_color_alias.colour.label("hair_color") - skin_color = skin_color_alias.colour.label("skin_color") - powers = sqlalchemy.func.array_agg( - sqlalchemy.func.distinct(SuperheroModel.classes.superpower.power_name), type_=ARRAY(sqlalchemy.String) - ).label("powers") - attributes = sqlalchemy.func.jsonb_object_agg( - SuperheroModel.classes.attribute.attribute_name, hero_attr.c.attribute_value - ).label("attributes") - - -gender_similarity = SimilarityIndex( - store=CaseInsensitiveSqlAlchemyStore(engine, "gender_similarity"), - fetcher=SimpleSqlAlchemyFetcher( - engine, - table=SuperheroModel.classes.gender, - column=SuperheroModel.classes.gender.gender, - ), -) -color_similarity = SimilarityIndex( - store=FaissStore( - index_dir=f"{tempfile.gettempdir()}/dbally_hero_indexes", - index_name="color_similarity", - max_distance=0.8, - embedding_client=OpenAiEmbeddingClient( - api_key=config.openai_api_key, - ), - ), - fetcher=SimpleSqlAlchemyFetcher( - engine, - table=SuperheroModel.classes.colour, - column=SuperheroModel.classes.colour.colour, - ), -) - -Gender = Annotated[str, gender_similarity] - - -class SuperheroFilterMixin: - @decorators.view_filter() - def filter_by_superhero_name(self, name: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.superhero_name == name - - @decorators.view_filter() - async def filter_by_eye_color(self, color: str) -> sqlalchemy.ColumnElement: - color = await color_similarity.similar(color) - return SuperheroModel.classes.superhero.eye_colour_id.in_( - sqlalchemy.select(SuperheroModel.classes.colour.id).where(SuperheroModel.classes.colour.colour == color) - ) - - @decorators.view_filter() - async def filter_by_hair_color(self, color: str) -> sqlalchemy.ColumnElement: - color = await color_similarity.similar(color) - return SuperheroModel.classes.superhero.hair_colour_id.in_( - sqlalchemy.select(SuperheroModel.classes.colour.id).where(SuperheroModel.classes.colour.colour == color) - ) - - @decorators.view_filter() - async def filter_by_skin_color(self, color: str) -> sqlalchemy.ColumnElement: - color = await color_similarity.similar(color) - return SuperheroModel.classes.superhero.skin_colour_id.in_( - sqlalchemy.select(SuperheroModel.classes.colour.id).where(SuperheroModel.classes.colour.colour == color) - ) - - @decorators.view_filter() - def filter_by_race(self, race: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.race_id.in_( - sqlalchemy.select(SuperheroModel.classes.race.id).where(SuperheroModel.classes.race.race == race) - ) - - @decorators.view_filter() - def filter_by_publisher(self, publisher: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.publisher_id.in_( - sqlalchemy.select(SuperheroModel.classes.publisher.id).where( - SuperheroModel.classes.publisher.publisher == publisher - ) - ) - - @decorators.view_filter() - def filter_by_alignment(self, alignment: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.alignment_id.in_( - sqlalchemy.select(SuperheroModel.classes.alignment.id).where( - SuperheroModel.classes.alignment.alignment == alignment - ) - ) - - @decorators.view_filter() - def filter_by_gender(self, gender: Gender) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.gender_id.in_( - sqlalchemy.select(SuperheroModel.classes.gender.id).where(SuperheroModel.classes.gender.gender == gender) - ) - - @decorators.view_filter() - def heavier_than(self, weight: float) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.weight_kg > weight - - @decorators.view_filter() - def lighter_than(self, weight: float) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.weight_kg < weight - - @decorators.view_filter() - def taller_than(self, height: float) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superhero.height_cm > height - - -class SuperheroView(SqlAlchemyBaseView, SuperheroFilterMixin): - """ - Main view, meant for finding superheroes meeting specific criteria - """ - - def __init__(self, sqlalchemy_engine: sqlalchemy.engine.Engine) -> None: - self._inner = sqlalchemy.select() - super().__init__(sqlalchemy_engine) - - def get_select(self) -> sqlalchemy.Select: - """ - Creates the initial SqlAlchemy select object, which will be used to build the query. - """ - return ( - sqlalchemy.select( - SuperheroDBSchema.id, - SuperheroDBSchema.name, - SuperheroDBSchema.full_name, - SuperheroDBSchema.gender, - SuperheroDBSchema.race, - SuperheroDBSchema.publisher_name, - SuperheroDBSchema.alignment, - SuperheroDBSchema.weight_kg, - SuperheroDBSchema.height_cm, - SuperheroDBSchema.eye_color, - SuperheroDBSchema.hair_color, - SuperheroDBSchema.skin_color, - SuperheroDBSchema.powers, - SuperheroDBSchema.attributes, - ) - .join( - SuperheroModel.classes.gender, - SuperheroModel.classes.superhero.gender_id == SuperheroModel.classes.gender.id, - ) - .join( - SuperheroModel.classes.race, SuperheroModel.classes.superhero.race_id == SuperheroModel.classes.race.id - ) - .join( - SuperheroModel.classes.publisher, - SuperheroModel.classes.superhero.publisher_id == SuperheroModel.classes.publisher.id, - ) - .join( - SuperheroModel.classes.alignment, - SuperheroModel.classes.superhero.alignment_id == SuperheroModel.classes.alignment.id, - ) - .join(eye_color_alias, SuperheroModel.classes.superhero.eye_colour_id == eye_color_alias.id) - .join(hair_color_alias, SuperheroModel.classes.superhero.hair_colour_id == hair_color_alias.id) - .join(skin_color_alias, SuperheroModel.classes.superhero.skin_colour_id == skin_color_alias.id) - .join(hero_power, hero_power.c.hero_id == SuperheroModel.classes.superhero.id) - .join(SuperheroModel.classes.superpower, SuperheroModel.classes.superpower.id == hero_power.c.power_id) - .join(hero_attr, hero_attr.c.hero_id == SuperheroModel.classes.superhero.id) - .join(SuperheroModel.classes.attribute, SuperheroModel.classes.attribute.id == hero_attr.c.attribute_id) - .group_by( - SuperheroDBSchema.id, - SuperheroDBSchema.name, - SuperheroDBSchema.full_name, - SuperheroDBSchema.gender, - SuperheroDBSchema.race, - SuperheroDBSchema.publisher_name, - SuperheroDBSchema.alignment, - SuperheroDBSchema.weight_kg, - SuperheroDBSchema.height_cm, - SuperheroDBSchema.eye_color, - SuperheroDBSchema.hair_color, - SuperheroDBSchema.skin_color, - ) - ) - - @decorators.view_filter() - def has_power(self, power: str) -> sqlalchemy.ColumnElement: - return self._inner.c.powers.contains([power]) - - # - # - # @decorators.view_filter() - # def power_higher_than(self, power_level: int) -> sqlalchemy.ColumnElement: - # return self._inner.c.attributes["Power"] < power_level # TODO: this does not work for some reason - # - # - # @decorators.view_filter() - # def combat_higher_than(self, combat_level: int) -> sqlalchemy.ColumnElement: - # return self._inner.c.attributes["Combat"] < combat_level # TODO: this does not work for some reason - - -# todo: sometimes I use classes, sometimes metadata.tables, because some classes aren't automapped correctly. -# at some point we should either fix the automap or use metadata.tables everywhere -class SuperheroCountByPowerView(SqlAlchemyBaseView, SuperheroFilterMixin): - """ - View used to count the number of superheroes with a specific power. - """ - - def __init__(self, sqlalchemy_engine: sqlalchemy.engine.Engine) -> None: - self._superhero_count = sqlalchemy.func.count(SuperheroModel.classes.superhero.id).label("superhero_count") - self._hero_power = SuperheroModel.metadata.tables["hero_power"] - - super().__init__(sqlalchemy_engine) - - def get_select(self) -> sqlalchemy.Select: - """ - Creates the initial SqlAlchemy select object, which will be used to build the query. - """ - # TODO: this should use part of the main query instead of replicating joins - return ( - sqlalchemy.select( - SuperheroModel.classes.superpower.power_name, - self._superhero_count, - ) - .join(self._hero_power, self._hero_power.c.hero_id == SuperheroModel.classes.superhero.id) - .join( - SuperheroModel.classes.superpower, SuperheroModel.classes.superpower.id == self._hero_power.c.power_id - ) - .group_by(SuperheroModel.classes.superpower.power_name) - ) - - @decorators.view_filter() - def filter_by_power(self, power: str) -> sqlalchemy.ColumnElement: - return SuperheroModel.classes.superpower.power_name == power - - -async def main(): - # TODO: should be done periodically, not each time the file is run - await gender_similarity.update() - await color_similarity.update() - - dbally.use_openai_llm( - model_name="gpt-4", - openai_api_key=config.openai_api_key, # You can pass key directly or just have OPENAI_API_KEY env var defined. - ) - dbally.use_event_handler(CLIEventHandler()) - - superheros_db = dbally.create_collection("superheros_db") - superheros_db.add(SuperheroView, lambda: SuperheroView(engine)) - superheros_db.add(SuperheroCountByPowerView, lambda: SuperheroCountByPowerView(engine)) - - await superheros_db.ask( - 'What heroes have "blueish" eyes and are taller than 180.5cm?', return_natural_response=True - ) - - await superheros_db.ask("Count power of female heros", return_natural_response=True) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/infra/terraform/main.tf b/infra/terraform/main.tf deleted file mode 100644 index 42ced474..00000000 --- a/infra/terraform/main.tf +++ /dev/null @@ -1,49 +0,0 @@ -terraform { - backend "gcs" { - bucket = "db-ally-tfstate" - } -} - -locals { - region = "europe-west1" - - database_name = "db-ally-postgres" - zone = "${local.region}-d" -} - -module "database" { - source = "./modules/database" - db_name = local.database_name - region = local.region - project = var.project - database_names = var.database_names -} - -resource "google_project_service" "compute_api" { - project = var.project - service = "compute.googleapis.com" -} - -resource "google_storage_bucket" "docs_bucket" { - name = "db-ally-documentation" - location = "EU" - storage_class = "STANDARD" - - website { - main_page_suffix = "index.html" - not_found_page = "404.html" - } -} - -resource "google_storage_bucket_iam_member" "member" { - provider = google - bucket = google_storage_bucket.docs_bucket.name - role = "roles/storage.objectViewer" - member = "allUsers" -} - -output "conn_string_prefix" { - description = "PostgreSQL connection string without / at the end." - value = module.database.conn_string_prefix - sensitive = true -} \ No newline at end of file diff --git a/infra/terraform/modules/database/main.tf b/infra/terraform/modules/database/main.tf deleted file mode 100644 index db527a72..00000000 --- a/infra/terraform/modules/database/main.tf +++ /dev/null @@ -1,60 +0,0 @@ -variable "region" {} -variable "project" {} -variable "db_name" {} -variable "database_names" {} - -resource "google_sql_database_instance" "postgres" { - name = var.db_name - region = var.region - project = var.project - - database_version = "POSTGRES_15" - - settings { - tier = "db-f1-micro" - - ip_configuration { - ipv4_enabled = true - authorized_networks { - name = "IPv4 default route" - value = "0.0.0.0/0" - } - } - - database_flags { - name = "max_connections" - value = "250" - } - } -} - -resource "google_sql_database" "database" { - for_each = toset(var.database_names) - name = each.key - instance = google_sql_database_instance.postgres.name - charset = "UTF8" - project = var.project -} - -resource "google_sql_user" "user" { - name = "developer" - instance = google_sql_database_instance.postgres.name - password = random_password.password.result - project = var.project -} - -resource "random_password" "password" { - length = 16 - special = true - override_special = "#$%^&*()" -} - -resource "google_project_service" "cloud_sql_admin" { - project = var.project - service = "sqladmin.googleapis.com" -} - -output "conn_string_prefix" { - description = "PostgreSQL connection string without database name at the end." - value = "postgresql://${google_sql_user.user.name}:${google_sql_user.user.password}@localhost:5432" -} diff --git a/infra/terraform/providers.tf b/infra/terraform/providers.tf deleted file mode 100644 index 0cc10d4b..00000000 --- a/infra/terraform/providers.tf +++ /dev/null @@ -1,14 +0,0 @@ -terraform { - required_providers { - google = { - source = "hashicorp/google", - version = "~> 4.80.0" - } - } -} - -provider "google" { - project = var.project - region = local.region - zone = local.zone -} \ No newline at end of file diff --git a/infra/terraform/variables.tf b/infra/terraform/variables.tf deleted file mode 100644 index 0b6e0b4c..00000000 --- a/infra/terraform/variables.tf +++ /dev/null @@ -1,20 +0,0 @@ -variable "project" { - default = "ds-internal-db-ally" -} - -variable "database_names" { - default = [ - "california_schools", - "card_games", - "clothes_retail", - "codebase_community", - "debit_card_specializing", - "european_football_2", - "financial", - "formula_1", - "student_club", - "superhero", - "thrombosis_prediction", - "toxicology" - ] -} \ No newline at end of file From 1f89364ee593497894df669ebb6212a1d65a3483 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Hordy=C5=84ski?= Date: Wed, 3 Apr 2024 12:55:30 +0200 Subject: [PATCH 3/3] gh prep --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..a983c936 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2024 deepsense.ai + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file